diff --git a/03-numpy/03.01-numpy-overview.ipynb b/03-numpy/03.01-numpy-overview.ipynb index cdc2c810..e8e1ad94 100644 --- a/03-numpy/03.01-numpy-overview.ipynb +++ b/03-numpy/03.01-numpy-overview.ipynb @@ -55,9 +55,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -89,9 +87,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "ename": "TypeError", @@ -120,9 +116,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -150,9 +144,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -179,9 +171,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -209,9 +199,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -238,9 +226,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -274,9 +260,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -303,9 +287,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -332,9 +314,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -361,9 +341,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -397,9 +375,7 @@ { "cell_type": "code", "execution_count": 13, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -426,9 +402,7 @@ { "cell_type": "code", "execution_count": 14, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -465,7 +439,6 @@ "cell_type": "code", "execution_count": 15, "metadata": { - "collapsed": false, "scrolled": true }, "outputs": [ @@ -495,9 +468,7 @@ { "cell_type": "code", "execution_count": 16, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -532,9 +503,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -565,9 +534,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -599,9 +566,7 @@ { "cell_type": "code", "execution_count": 19, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -646,9 +611,7 @@ { "cell_type": "code", "execution_count": 20, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -688,9 +651,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -720,23 +681,23 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.6.5" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/docs/01-python-tools/01.01-python-overview.md b/docs/01-python-tools/01.01-python-overview.md new file mode 100644 index 00000000..18ad8062 --- /dev/null +++ b/docs/01-python-tools/01.01-python-overview.md @@ -0,0 +1,133 @@ + +# Python 简介 + +## **Python** 历史 + +`Python` 的创始人为荷兰人吉多·范罗苏姆(`Guido van Rossum`)。1989年的圣诞节期间,吉多·范罗苏姆为了在阿姆斯特丹打发时间,决心开发一个新的脚本解释程序,作为 ABC 语言的一种继承。之所以选中 `Python` 作为程序的名字,是因为他是 BBC 电视剧——蒙提·派森的飞行马戏团(`Monty Python's Flying Circus`)的爱好者。 + +1991年,第一个 Python 编译器诞生。它是用C语言实现的,并能够调用C语言的库文件。 + +`Python 2.0` 于 2000 年 10 月 16 日发布,增加了实现完整的垃圾回收,并且支持 `Unicode`。 + +`Python 3.0` 于 2008 年 12 月 3 日发布,此版不完全兼容之前的 `Python` 源代码。不过,很多新特性后来也被移植到旧的 `Python 2.6/2.7` 版本。 + +## 第一行Python代码 + +安装好 `Python` 之后,在命令行下输入: + + python + +就可以进入 `Python` 解释器的页面。 + +按照惯例,第一行代码应该是输出 `"hello world!"`: + + +```python +print "hello world!" +``` + + hello world! + + +相对与 `Java,C` 等语言,`Python` 仅仅使用一行语句就完成的了这个任务。 + +可以将这句话的内容保存到一个文本文件中,并使用后缀名 `.py` 结尾,例如 `hello_world.py`,在命令行下运行这个程序: + + python hello_world.py + +也会输出 `"hello world!"` 的结果。 + +## Python 之禅 + +在 **Python** 解释器下输入 + +```import this``` + +会出来这样一首小诗: + + +```python +import this +``` + + The Zen of Python, by Tim Peters + + Beautiful is better than ugly. + Explicit is better than implicit. + Simple is better than complex. + Complex is better than complicated. + Flat is better than nested. + Sparse is better than dense. + Readability counts. + Special cases aren't special enough to break the rules. + Although practicality beats purity. + Errors should never pass silently. + Unless explicitly silenced. + In the face of ambiguity, refuse the temptation to guess. + There should be one-- and preferably only one --obvious way to do it. + Although that way may not be obvious at first unless you're Dutch. + Now is better than never. + Although never is often better than *right* now. + If the implementation is hard to explain, it's a bad idea. + If the implementation is easy to explain, it may be a good idea. + Namespaces are one honking great idea -- let's do more of those! + + +这首诗反映了**Python**的设计哲学——**Python**是一种追求优雅,明确,简单的编程语言,但事实上,产生这首诗的代码并没有写的那么简单易懂: + + +```python +s = """Gur Mra bs Clguba, ol Gvz Crgref + +Ornhgvshy vf orggre guna htyl. +Rkcyvpvg vf orggre guna vzcyvpvg. +Fvzcyr vf orggre guna pbzcyrk. +Pbzcyrk vf orggre guna pbzcyvpngrq. +Syng vf orggre guna arfgrq. +Fcnefr vf orggre guna qrafr. +Ernqnovyvgl pbhagf. +Fcrpvny pnfrf nera'g fcrpvny rabhtu gb oernx gur ehyrf. +Nygubhtu cenpgvpnyvgl orngf chevgl. +Reebef fubhyq arire cnff fvyragyl. +Hayrff rkcyvpvgyl fvyraprq. +Va gur snpr bs nzovthvgl, ershfr gur grzcgngvba gb thrff. +Gurer fubhyq or bar-- naq cersrenoyl bayl bar --boivbhf jnl gb qb vg. +Nygubhtu gung jnl znl abg or boivbhf ng svefg hayrff lbh'er Qhgpu. +Abj vf orggre guna arire. +Nygubhtu arire vf bsgra orggre guna *evtug* abj. +Vs gur vzcyrzragngvba vf uneq gb rkcynva, vg'f n onq vqrn. +Vs gur vzcyrzragngvba vf rnfl gb rkcynva, vg znl or n tbbq vqrn. +Anzrfcnprf ner bar ubaxvat terng vqrn -- yrg'f qb zber bs gubfr!""" + +d = {} +for c in (65, 97): + for i in range(26): + d[chr(i+c)] = chr((i+13) % 26 + c) + +print "".join([d.get(c, c) for c in s]) +``` + + The Zen of Python, by Tim Peters + + Beautiful is better than ugly. + Explicit is better than implicit. + Simple is better than complex. + Complex is better than complicated. + Flat is better than nested. + Sparse is better than dense. + Readability counts. + Special cases aren't special enough to break the rules. + Although practicality beats purity. + Errors should never pass silently. + Unless explicitly silenced. + In the face of ambiguity, refuse the temptation to guess. + There should be one-- and preferably only one --obvious way to do it. + Although that way may not be obvious at first unless you're Dutch. + Now is better than never. + Although never is often better than *right* now. + If the implementation is hard to explain, it's a bad idea. + If the implementation is easy to explain, it may be a good idea. + Namespaces are one honking great idea -- let's do more of those! + + +> Life is short. Use Python. diff --git a/docs/01-python-tools/01.02-ipython-interpreter.md b/docs/01-python-tools/01.02-ipython-interpreter.md new file mode 100644 index 00000000..0db09912 --- /dev/null +++ b/docs/01-python-tools/01.02-ipython-interpreter.md @@ -0,0 +1,341 @@ + +# Ipython 解释器 + +## 进入ipython + +通常我们并不使用**Python**自带的解释器,而是使用另一个比较方便的解释器——**ipython**解释器,命令行下输入: + + ipython + +即可进入**ipython**解释器。 + +所有在**python**解释器下可以运行的代码都可以在**ipython**解释器下运行: + + +```python +print "hello, world" +``` + + hello, world + + +可以进行简单赋值操作: + + +```python +a = 1 +``` + +直接在解释器中输入变量名,会显示变量的值(不需要加`print`): + + +```python +a +``` + + + + + 1 + + + + +```python +b = [1, 2, 3] +``` + +## ipython magic命令 + +**ipython**解释器提供了很多以百分号`%`开头的`magic`命令,这些命令很像linux系统下的命令行命令(事实上有些是一样的)。 + +查看所有的`magic`命令: + + +```python +%lsmagic +``` + + + + + Available line magics: + %alias %alias_magic %autocall %automagic %autosave %bookmark %cd %clear %cls %colors %config %connect_info %copy %ddir %debug %dhist %dirs %doctest_mode %echo %ed %edit %env %gui %hist %history %install_default_config %install_ext %install_profiles %killbgscripts %ldir %less %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %macro %magic %matplotlib %mkdir %more %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %popd %pprint %precision %profile %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %ren %rep %rerun %reset %reset_selective %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode + + Available cell magics: + %%! %%HTML %%SVG %%bash %%capture %%cmd %%debug %%file %%html %%javascript %%latex %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile + + Automagic is ON, % prefix IS NOT needed for line magics. + + + +`line magic` 以一个百分号开头,作用与一行; + +`cell magic` 以两个百分号开头,作用于整个cell。 + +最后一行`Automagic is ON, % prefix IS NOT needed for line magics.`说明在此时即使不加上`%`也可以使用这些命令。 + +使用 `whos` 查看当前的变量空间: + + +```python +%whos +``` + + Variable Type Data/Info + ---------------------------- + a int 1 + b list n=3 + + +使用 `reset` 重置当前变量空间: + + +```python +%reset -f +``` + +再查看当前变量空间: + + +```python +%whos +``` + + Interactive namespace is empty. + + +使用 `pwd` 查看当前工作文件夹: + + +```python +%pwd +``` + + + + + u'C:\\Users\\lijin\\Documents\\Git\\python-tutorial\\01. python tools' + + + +使用 `mkdir` 产生新文件夹: + + +```python +%mkdir demo_test +``` + +使用 `cd` 改变工作文件夹: + + +```python +%cd demo_test/ +``` + + C:\Users\lijin\Documents\Git\python-tutorial\01. python tools\demo_test + + +使用 `writefile` 将cell中的内容写入文件: + + +```python +%%writefile hello_world.py +print "hello world" +``` + + Writing hello_world.py + + +使用 `ls` 查看当前工作文件夹的文件: + + +```python +%ls +``` + + 驱动器 C 中的卷是 System + 卷的序列号是 DC4B-D785 + + C:\Users\lijin\Documents\Git\python-tutorial\01. python tools\demo_test 的目录 + + 2015/09/18 11:32 . + 2015/09/18 11:32 .. + 2015/09/18 11:32 19 hello_world.py + 1 个文件 19 字节 + 2 个目录 121,763,831,808 可用字节 + + +使用 `run` 命令来运行这个代码: + + +```python +%run hello_world.py +``` + + hello world + + +删除这个文件: + + +```python +import os +os.remove('hello_world.py') +``` + +查看当前文件夹,`hello_world.py` 已被删除: + + +```python +%ls +``` + + 驱动器 C 中的卷是 System + 卷的序列号是 DC4B-D785 + + C:\Users\lijin\Documents\Git\python-tutorial\01. python tools\demo_test 的目录 + + 2015/09/18 11:32 . + 2015/09/18 11:32 .. + 0 个文件 0 字节 + 2 个目录 121,763,831,808 可用字节 + + +返回上一层文件夹: + + +```python +%cd .. +``` + + C:\Users\lijin\Documents\Git\python-tutorial\01. python tools + + +使用 `rmdir` 删除文件夹: + + +```python +%rmdir demo_test +``` + +使用 `hist` 查看历史命令: + + +```python +%hist +``` + + print "hello, world" + a = 1 + a + b = [1, 2, 3] + %lsmagic + %whos + %reset -f + %whos + %pwd + %mkdir demo_test + %cd demo_test/ + %%writefile hello_world.py + print "hello world" + %ls + %run hello_world.py + import os + os.remove('hello_world.py') + %ls + %cd .. + %rmdir demo_test + %hist + + +## ipython 使用 + +使用 `?` 查看函数的帮助: + + +```python +sum? +``` + +使用 `??` 查看函数帮助和函数源代码(如果是用**python**实现的): + + +```python +# 导入numpy和matplotlib两个包 +%pylab +# 查看其中sort函数的帮助 +sort?? +``` + + Using matplotlib backend: Qt4Agg + Populating the interactive namespace from numpy and matplotlib + + +**ipython** 支持使用 `` 键自动补全命令。 + +使用 `_` 使用上个cell的输出结果: + + +```python +a = 12 +a +``` + + + + + 12 + + + + +```python +_ + 13 +``` + + + + + 25 + + + +可以使用 `!` 来执行一些系统命令。 + + +```python +!ping baidu.com +``` + + + 正在 Ping baidu.com [180.149.132.47] 具有 32 字节的数据: + 来自 180.149.132.47 的回复: 字节=32 时间=69ms TTL=49 + 来自 180.149.132.47 的回复: 字节=32 时间=64ms TTL=49 + 来自 180.149.132.47 的回复: 字节=32 时间=61ms TTL=49 + 来自 180.149.132.47 的回复: 字节=32 时间=63ms TTL=49 + + 180.149.132.47 的 Ping 统计信息: + 数据包: 已发送 = 4,已接收 = 4,丢失 = 0 (0% 丢失), + 往返行程的估计时间(以毫秒为单位): + 最短 = 61ms,最长 = 69ms,平均 = 64ms + + +当输入出现错误时,**ipython**会指出出错的位置和原因: + + +```python +1 + "hello" +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 1 + "hello" + + + TypeError: unsupported operand type(s) for +: 'int' and 'str' + diff --git a/docs/01-python-tools/01.03-ipython-notebook.md b/docs/01-python-tools/01.03-ipython-notebook.md new file mode 100644 index 00000000..a9602916 --- /dev/null +++ b/docs/01-python-tools/01.03-ipython-notebook.md @@ -0,0 +1,15 @@ + +# Ipython notebook + +在命令行下输入命令: + + ipython notebook + +会打开一个notebook本地服务器,一般地址是 http://localhost:8888 + +**`ipython notebook`** 支持两种模式的cell: + +* Markdown +* Code + +这里不做过多介绍。 diff --git a/docs/01-python-tools/01.04-use-anaconda.md b/docs/01-python-tools/01.04-use-anaconda.md new file mode 100644 index 00000000..7581e056 --- /dev/null +++ b/docs/01-python-tools/01.04-use-anaconda.md @@ -0,0 +1,235 @@ + +# 使用 Anaconda + +[Anaconda](http://www.continuum.io/downloads)是一个很好用的Python IDE,它集成了很多科学计算需要使用的**python**第三方工具包。 + +## conda 的使用 + +根据自己的操作系统安装好[Anaconda](http://www.continuum.io/downloads)后,在命令行下输入: + + conda list + +可以看已经安装好的**python**第三方工具包,这里我们使用 `magic` 命令 `%%cmd` 在 `ipython cell` 中来执行这个命令: + + +```python +!conda list +``` + + # packages in environment at C:\Anaconda: + # + _license 1.1 py27_0 + alabaster 0.7.3 py27_0 + anaconda 2.3.0 np19py27_0 + argcomplete 0.8.9 py27_0 + astropy 1.0.3 np19py27_0 + babel 1.3 py27_0 + backports.ssl-match-hostname 3.4.0.2 + basemap 1.0.7 np19py27_0 + bcolz 0.9.0 np19py27_0 + beautiful-soup 4.3.2 py27_1 + beautifulsoup4 4.3.2 + binstar 0.11.0 py27_0 + bitarray 0.8.1 py27_1 + blaze 0.8.0 + blaze-core 0.8.0 np19py27_0 + blz 0.6.2 np19py27_1 + bokeh 0.9.0 np19py27_0 + boto 2.38.0 py27_0 + bottleneck 1.0.0 np19py27_0 + cartopy 0.13.0 np19py27_0 + cdecimal 2.3 py27_1 + certifi 14.05.14 py27_0 + cffi 1.1.0 py27_0 + clyent 0.3.4 py27_0 + colorama 0.3.3 py27_0 + conda 3.17.0 py27_0 + conda-build 1.14.1 py27_0 + conda-env 2.4.2 py27_0 + configobj 5.0.6 py27_0 + cryptography 0.9.1 py27_0 + cython 0.22.1 py27_0 + cytoolz 0.7.3 py27_0 + datashape 0.4.5 np19py27_0 + decorator 3.4.2 py27_0 + docutils 0.12 py27_1 + dynd-python 0.6.5 np19py27_0 + enum34 1.0.4 py27_0 + fastcache 1.0.2 py27_0 + flask 0.10.1 py27_1 + funcsigs 0.4 py27_0 + geopy 1.11.0 + geos 3.4.2 3 + gevent 1.0.1 py27_0 + gevent-websocket 0.9.3 py27_0 + greenlet 0.4.7 py27_0 + grin 1.2.1 py27_2 + h5py 2.5.0 np19py27_1 + hdf5 1.8.15.1 2 + idna 2.0 py27_0 + ipaddress 1.0.7 py27_0 + ipython 3.2.0 py27_0 + ipython-notebook 3.2.0 py27_0 + ipython-qtconsole 3.2.0 py27_0 + itsdangerous 0.24 py27_0 + jdcal 1.0 py27_0 + jedi 0.8.1 py27_0 + jinja2 2.7.3 py27_2 + jsonschema 2.4.0 py27_0 + launcher 1.0.0 1 + libpython 1.0 py27_1 + llvmlite 0.5.0 py27_0 + lxml 3.4.4 py27_0 + markupsafe 0.23 py27_0 + matplotlib 1.4.3 np19py27_1 + menuinst 1.0.4 py27_0 + mingw 4.7 1 + mistune 0.5.1 py27_1 + mock 1.3.0 py27_0 + multipledispatch 0.4.7 py27_0 + networkx 1.9.1 py27_0 + nltk 3.0.3 np19py27_0 + node-webkit 0.10.1 0 + nose 1.3.7 py27_0 + numba 0.19.1 np19py27_0 + numexpr 2.4.3 np19py27_0 + numpy 1.9.2 py27_0 + odo 0.3.2 np19py27_0 + openpyxl 1.8.5 py27_0 + owslib 0.9.0 py27_0 + pandas 0.16.2 np19py27_0 + patsy 0.3.0 np19py27_0 + pbr 1.3.0 py27_0 + pep8 1.6.2 py27_0 + pillow 2.9.0 py27_0 + pip 7.1.2 py27_0 + ply 3.6 py27_0 + proj4 4.9.1 py27_1 + psutil 2.2.1 py27_0 + py 1.4.27 py27_0 + pyasn1 0.1.7 py27_0 + pycosat 0.6.1 py27_0 + pycparser 2.14 py27_0 + pycrypto 2.6.1 py27_3 + pyepsg 0.2.0 py27_0 + pyflakes 0.9.2 py27_0 + pygments 2.0.2 py27_0 + pyopenssl 0.15.1 py27_1 + pyparsing 2.0.3 py27_0 + pyqt 4.10.4 py27_1 + pyreadline 2.0 py27_0 + pyshp 1.2.1 py27_0 + pytables 3.2.0 np19py27_0 + pytest 2.7.1 py27_0 + python 2.7.10 0 + python-dateutil 2.4.2 py27_0 + pytz 2015.4 py27_0 + pywin32 219 py27_0 + pyyaml 3.11 py27_2 + pyzmq 14.7.0 py27_0 + requests 2.7.0 py27_0 + rope 0.9.4 py27_1 + runipy 0.1.3 py27_0 + scikit-image 0.11.3 np19py27_0 + scikit-learn 0.16.1 np19py27_0 + scipy 0.16.0 np19py27_0 + setuptools 18.1 py27_0 + shapely 1.5.11 nppy27_0 + six 1.9.0 py27_0 + snowballstemmer 1.2.0 py27_0 + sockjs-tornado 1.0.1 py27_0 + sphinx 1.3.1 py27_0 + sphinx-rtd-theme 0.1.7 + sphinx_rtd_theme 0.1.7 py27_0 + spyder 2.3.5.2 py27_0 + spyder-app 2.3.5.2 py27_0 + sqlalchemy 1.0.5 py27_0 + ssl_match_hostname 3.4.0.2 py27_0 + statsmodels 0.6.1 np19py27_0 + sympy 0.7.6 py27_0 + tables 3.2.0 + theano 0.7.0 + toolz 0.7.2 py27_0 + tornado 4.2 py27_0 + ujson 1.33 py27_0 + unicodecsv 0.9.4 py27_0 + werkzeug 0.10.4 py27_0 + wheel 0.24.0 py27_0 + xlrd 0.9.3 py27_0 + xlsxwriter 0.7.3 py27_0 + xlwings 0.3.5 py27_0 + xlwt 1.0.0 py27_0 + zlib 1.2.8 0 + + +第一次安装好 [Anaconda](http://www.continuum.io/downloads) 以后,可以在命令行输入以下命令使 [Anaconda](http://www.continuum.io/downloads) 保持最新: + + conda update conda + conda update anaconda + +conda 是一种很强大的工具,具体用法可以参照它的[文档](http://conda.pydata.org/docs/)。 + +也可以参考它的 [cheat sheet](http://conda.pydata.org/docs/_downloads/conda-cheatsheet.pdf) 来快速查看它的用法。 + +可以使用它来安装,更新,卸载第三方的 **python** 工具包: + + conda install + conda update + conda remove + +在安装或更新时可以指定安装的版本号,例如需要使用 `numpy 1.8.1`: + + conda install numpy=1.8.1 + conda update numpy=1.8.1 + +查看 `conda` 的信息: + + conda info + + +```python +!conda info +``` + + Current conda install: + + platform : win-64 + conda version : 3.17.0 + conda-build version : 1.14.1 + python version : 2.7.10.final.0 + requests version : 2.7.0 + root environment : C:\Anaconda (writable) + default environment : C:\Anaconda + envs directories : C:\Anaconda\envs + package cache : C:\Anaconda\pkgs + channel URLs : https://repo.continuum.io/pkgs/free/win-64/ + https://repo.continuum.io/pkgs/free/noarch/ + https://repo.continuum.io/pkgs/pro/win-64/ + https://repo.continuum.io/pkgs/pro/noarch/ + config file : None + is foreign system : False + + + +一个很棒的功能是 `conda` 可以产生一个自定义的环境,假设在安装的是 **Python 2.7** 的情况下,想使用 **Python 3.4**,只需要在命令行下使用 `conda` 产生一个新的环境: + + conda create -n py34 python=3.4 + +这里这个环境被命名为 `py34` ,可以根据喜好将 `py34` 改成其他的名字。 + +使用这个环境时,只需要命令行下输入: + +``` python +activate py34 #(windows) +source activate py34 #(linux, mac) +``` + +此时,我们的 **Python** 版本便是 **`python 3.4`**了。 + +## spyder 编辑器 + +`Anaconda` 默认使用的编辑器是 `spyder`,可以在命令行下输入: + + spyder + +来进入这个编辑器,具体使用方法不做介绍。 diff --git a/docs/02-python-essentials/02.01-a-tour-of-python.md b/docs/02-python-essentials/02.01-a-tour-of-python.md new file mode 100644 index 00000000..21d0de7d --- /dev/null +++ b/docs/02-python-essentials/02.01-a-tour-of-python.md @@ -0,0 +1,939 @@ + +# Python 入门演示 + +## 简单的数学运算 + +整数相加,得到整数: + + +```python +2 + 2 +``` + + + + + 4 + + + +浮点数相加,得到浮点数: + + +```python +2.0 + 2.5 +``` + + + + + 4.5 + + + +整数和浮点数相加,得到浮点数: + + +```python +2 + 2.5 +``` + + + + + 4.5 + + + +## 变量赋值 + +**Python**使用`<变量名>=<表达式>`的方式对变量进行赋值 + + +```python +a = 0.2 +``` + +## 字符串 String + +字符串的生成,单引号与双引号是等价的: + + +```python +s = "hello world" +s +``` + + + + + 'hello world' + + + + +```python +s = 'hello world' +s +``` + + + + + 'hello world' + + + +三引号用来输入包含多行文字的字符串: + + +```python +s = """hello +world""" +print s +``` + + hello + world + + + +```python +s = '''hello +world''' +print s +``` + + hello + world + + +字符串的加法: + + +```python +s = "hello" + " world" +s +``` + + + + + 'hello world' + + + +字符串索引: + + + +```python +s[0] +``` + + + + + 'h' + + + + +```python +s[-1] +``` + + + + + 'd' + + + + +```python +s[0:5] +``` + + + + + 'hello' + + + +字符串的分割: + + +```python +s = "hello world" +s.split() +``` + + + + + ['hello', 'world'] + + + +查看字符串的长度: + + +```python +len(s) +``` + + + + + 11 + + + +## 列表 List + +Python用`[]`来生成列表 + + +```python +a = [1, 2.0, 'hello', 5 + 1.0] +a +``` + + + + + [1, 2.0, 'hello', 6.0] + + + +列表加法: + + +```python +a + a +``` + + + + + [1, 2.0, 'hello', 6.0, 1, 2.0, 'hello', 6.0] + + + +列表索引: + + +```python +a[1] +``` + + + + + 2.0 + + + +列表长度: + + +```python +len(a) +``` + + + + + 4 + + + +向列表中添加元素: + + +```python +a.append("world") +a +``` + + + + + [1, 2.0, 'hello', 6.0, 'world'] + + + +## 集合 Set + +Python用{}来生成集合,集合中不含有相同元素。 + + +```python +s = {2, 3, 4, 2} +s +``` + + + + + {2, 3, 4} + + + +集合的长度: + + +```python +len(s) +``` + + + + + 3 + + + +向集合中添加元素: + + +```python +s.add(1) +s +``` + + + + + {1, 2, 3, 4} + + + +集合的交: + + +```python +a = {1, 2, 3, 4} +b = {2, 3, 4, 5} +a & b +``` + + + + + {2, 3, 4} + + + +并: + + +```python +a | b +``` + + + + + {1, 2, 3, 4, 5} + + + +差: + + +```python +a - b +``` + + + + + {1} + + + +对称差: + + +```python +a ^ b +``` + + + + + {1, 5} + + + +## 字典 Dictionary + +Python用`{key:value}`来生成Dictionary。 + + +```python +d = {'dogs':5, 'cats':4} +d +``` + + + + + {'cats': 4, 'dogs': 5} + + + +字典的大小 + + +```python +len(d) +``` + + + + + 2 + + + +查看字典某个键对应的值: + + +```python +d["dogs"] +``` + + + + + 5 + + + +修改键值: + + +```python +d["dogs"] = 2 +d +``` + + + + + {'cats': 4, 'dogs': 2} + + + +插入键值: + + +```python +d["pigs"] = 7 +d +``` + + + + + {'cats': 4, 'dogs': 2, 'pigs': 7} + + + +所有的键: + + +```python +d.keys() +``` + + + + + ['cats', 'dogs', 'pigs'] + + + +所有的值: + + +```python +d.values() +``` + + + + + [4, 2, 7] + + + +所有的键值对: + + +```python +d.items() +``` + + + + + [('cats', 4), ('dogs', 2), ('pigs', 7)] + + + +## 数组 Numpy Arrays + +需要先导入需要的包,Numpy数组可以进行很多列表不能进行的运算。 + + +```python +from numpy import array +a = array([1, 2, 3, 4]) +a +``` + + + + + array([1, 2, 3, 4]) + + + +加法: + + +```python +a + 2 +``` + + + + + array([3, 4, 5, 6]) + + + + +```python +a + a +``` + + + + + array([2, 4, 6, 8]) + + + +## 画图 Plot + +Python提供了一个很像MATLAB的绘图接口。 + + +```python +%matplotlib inline +from matplotlib.pyplot import plot +plot(a, a**2) +``` + + + + + [] + + + + +![png](output_79_1.png) + + +## 循环 Loop + + +```python +line = '1 2 3 4 5' +fields = line.split() +fields +``` + + + + + ['1', '2', '3', '4', '5'] + + + + +```python +total = 0 +for field in fields: + total += int(field) +total +``` + + + + + 15 + + + +Python中有一种叫做列表推导式(List comprehension)的用法: + + +```python +numbers = [int(field) for field in fields] +numbers +``` + + + + + [1, 2, 3, 4, 5] + + + + +```python +sum(numbers) +``` + + + + + 15 + + + +写在一行: + + +```python +sum([int(field) for field in line.split()]) +``` + + + + + 15 + + + +## 文件操作 File IO + + +```python +cd ~ +``` + + d:\Users\lijin + + +写文件: + + +```python +f = open('data.txt', 'w') +f.write('1 2 3 4\n') +f.write('2 3 4 5\n') +f.close() +``` + +读文件: + + +```python +f = open('data.txt') +data = [] +for line in f: + data.append([int(field) for field in line.split()]) +f.close() +data + +``` + + + + + [[1, 2, 3, 4], [2, 3, 4, 5]] + + + + +```python +for row in data: + print row +``` + + [1, 2, 3, 4] + [2, 3, 4, 5] + + +删除文件: + + +```python +import os +os.remove('data.txt') +``` + +## 函数 Function + +Python用关键词`def`来定义函数。 + + +```python +def poly(x, a, b, c): + y = a * x ** 2 + b * x + c + return y + +x = 1 +poly(x, 1, 2, 3) +``` + + + + + 6 + + + +用Numpy数组做参数x: + + +```python +x = array([1, 2, 3]) +poly(x, 1, 2, 3) +``` + + + + + array([ 6, 11, 18]) + + + +可以在定义时指定参数的默认值: + + +```python +from numpy import arange + +def poly(x, a = 1, b = 2, c = 3): + y = a*x**2 + b*x + c + return y + +x = arange(10) +x +array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) +``` + + + + + array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + + + + +```python +poly(x) +``` + + + + + array([ 3, 6, 11, 18, 27, 38, 51, 66, 83, 102]) + + + + +```python +poly(x, b = 1) +``` + + + + + array([ 3, 5, 9, 15, 23, 33, 45, 59, 75, 93]) + + + +## 模块 Module + +Python中使用`import`关键词来导入模块。 + + +```python +import os +``` + +当前进程号: + + +```python +os.getpid() +``` + + + + + 4400 + + + +系统分隔符: + + +```python +os.sep +``` + + + + + '\\' + + + +## - 类 Class + +用`class`来定义一个类。 +`Person(object)`表示继承自`object`类; +`__init__`函数用来初始化对象; +`self`表示对象自身,类似于`C` `Java`里面`this`。 + + +```python +class Person(object): + def __init__(self, first, last, age): + self.first = first + self.last = last + self.age = age + def full_name(self): + return self.first + ' ' + self.last +``` + +构建新对象: + + +```python +person = Person('Mertle', 'Sedgewick', 52) +``` + +调用对象的属性: + + +```python +person.first +``` + + + + + 'Mertle' + + + +调用对象的方法: + + +```python +person.full_name() +``` + + + + + 'Mertle Sedgewick' + + + +修改对象的属性: + + +```python +person.last = 'Smith' +``` + +添加新属性,d是之前定义的字典: + + +```python +person.critters = d +person.critters +``` + + + + + {'cats': 4, 'dogs': 2, 'pigs': 7} + + + +## 网络数据 Data from Web + + +```python +url = 'http://ichart.finance.yahoo.com/table.csv?s=GE&d=10&e=5&f=2013&g=d&a=0&b=2&c=1962&ignore=.csv' +``` + +处理后就相当于一个可读文件: + + +```python +import urllib2 +ge_csv = urllib2.urlopen(url) +data = [] +for line in ge_csv: + data.append(line.split(',')) +data[:4] +``` + + + + + [['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close\n'], + ['2013-11-05', '26.32', '26.52', '26.26', '26.42', '24897500', '24.872115\n'], + ['2013-11-04', + '26.59', + '26.59', + '26.309999', + '26.43', + '28166100', + '24.88153\n'], + ['2013-11-01', + '26.049999', + '26.639999', + '26.030001', + '26.540001', + '55634500', + '24.985086\n']] + + + +使用`pandas`处理数据: + + +```python +ge_csv = urllib2.urlopen(url) +import pandas +ge = pandas.read_csv(ge_csv, index_col=0, parse_dates=True) +ge.plot(y='Adj Close') +``` + + + + + + + + + +![png](output_131_1.png) + diff --git a/docs/02-python-essentials/02.02-python-data-types.md b/docs/02-python-essentials/02.02-python-data-types.md new file mode 100644 index 00000000..07eea6d9 --- /dev/null +++ b/docs/02-python-essentials/02.02-python-data-types.md @@ -0,0 +1,24 @@ + +# Python 数据类型 + +## 常用数据类型 Common Data Types + +| 类型| 例子| +| ----- | ----- | +| 整数 | `-100` | +| 浮点数 | `3.1416` | +| 字符串 | `'hello'` | +| 列表 | `[1, 1.2, 'hello']` | +| 字典 | `{'dogs': 5, 'pigs': 3}`| +| Numpy数组 | `array([1, 2, 3])` + +## 其他类型 Others + +| 类型| 例子| +| ------- | ----- | +| 长整型 | `1000000000000L` +| 布尔型 | `True, False` +| 元组 | `('ring', 1000)` +| 集合 | `{1, 2, 3}` +| Pandas类型| `DataFrame, Series` +| 自定义 | `Object Oriented Classes` diff --git a/docs/02-python-essentials/02.03-numbers.md b/docs/02-python-essentials/02.03-numbers.md new file mode 100644 index 00000000..7bc65966 --- /dev/null +++ b/docs/02-python-essentials/02.03-numbers.md @@ -0,0 +1,737 @@ + +# 数字 + +## 整型 Integers + +整型运算,加减乘: + + +```python +2 + 2 +``` + + + + + 4 + + + + +```python +3 - 4 +``` + + + + + -1 + + + + +```python +4 * 5 +``` + + + + + 20 + + + +在**Python 2.7**中,整型的运算结果只能返回整型,**除法**的结果也不例外。 + +例如`12 / 5`返回的结果并不是2.4,而是2: + + +```python +12 / 5 +``` + + + + + 2 + + + +幂指数: + + +```python +2 ** 5 +``` + + + + + 32 + + + +取余: + + +```python +32 % 5 +``` + + + + + 2 + + + +赋值给变量: + + +```python +a = 1 +a +``` + + + + + 1 + + + +使用`type()`函数来查看变量类型: + + +```python +type(a) +``` + + + + + int + + + +整型数字的最大最小值: + +在 32 位系统中,一个整型 4 个字节,最小值 `-2,147,483,648`,最大值 `2,147,483,647`。 + +在 64 位系统中,一个整型 8 个字节,最小值 `-9,223,372,036,854,775,808`,最大值 `9,223,372,036,854,775,807`。 + + +```python +import sys +sys.maxint +``` + + + + + 2147483647 + + + +## 长整型 Long Integers + +当整型超出范围时,**Python**会自动将整型转化为长整型,不过长整型计算速度会比整型慢。 + + +```python +a = sys.maxint + 1 +print type(a) +``` + + + + +长整型的一个标志是后面以字母L结尾: + + +```python +a +``` + + + + + 2147483648L + + + +可以在赋值时强制让类型为长整型: + + +```python +b = 1234L +type(b) +``` + + + + + long + + + +长整型可以与整型在一起进行计算,返回的类型还是长整型: + + +```python +a - 4 +``` + + + + + 2147483644L + + + +## 浮点数 Floating Point Numbers + + +```python +a = 1.4 +type(a) +``` + + + + + float + + + +在之前的除法例子`12 / 5`中,假如想要使返回的结果为2.4,可以将它们写成浮点数的形式: + + +```python +12.0 / 5.0 +``` + + + + + 2.4 + + + + +```python +12 / 5.0 +``` + + + + + 2.4 + + + + +```python +12.0 / 5 +``` + + + + + 2.4 + + + +上面的例子说明,浮点数与整数进行运算时,返回的仍然是浮点数: + + +```python +5 + 2.4 +``` + + + + + 7.4 + + + +浮点数也可以进行与整数相似的运算,甚至可以取余: + + +```python +3.4 - 3.2 +``` + + + + + 0.19999999999999973 + + + + +```python +12.3 + 32.4 +``` + + + + + 44.7 + + + + +```python +2.5 ** 2 +``` + + + + + 6.25 + + + + +```python +3.4 % 2.1 +``` + + + + + 1.2999999999999998 + + + +**Python**的浮点数标准与**C**,**Java**一致,都是[IEEE 754 floating point standard](http://en.wikipedia.org/wiki/IEEE_floating_point)。 + +注意看 `3.4 - 3.2` 的结果并不是我们预期的`0.2`,这是因为浮点数本身储存方式引起的,浮点数本身会存在一点误差。 + +事实上,**Python** 中储存的值为'0.199999999999999733546474089962430298328399658203125',因为这是最接近0.2的浮点数。| + + +```python +'{:.52}'.format(3.4 - 3.2) +``` + + + + + '0.199999999999999733546474089962430298328399658203125' + + + +当我们使用`print`显示时,**Python**会自动校正这个结果 + + +```python +print 3.4 - 3.2 +``` + + 0.2 + + +可以用`sys.float_info`来查看浮点数的信息: + + +```python +import sys +sys.float_info +``` + + + + + sys.float_info(max=1.7976931348623157e+308, max_exp=1024, max_10_exp=308, min=2.2250738585072014e-308, min_exp=-1021, min_10_exp=-307, dig=15, mant_dig=53, epsilon=2.220446049250313e-16, radix=2, rounds=1) + + + +例如浮点数能表示的最大值: + + +```python +sys.float_info.max +``` + + + + + 1.7976931348623157e+308 + + + +浮点数能表示的最接近0的值: + + +```python +sys.float_info.min +``` + + + + + 2.2250738585072014e-308 + + + +浮点数的精度: + + +```python +sys.float_info.epsilon +``` + + + + + 2.220446049250313e-16 + + + +## 复数 Complex Numbers + +**Python** 使用 `j` 来表示复数的虚部: + + +```python +a = 1 + 2j +type(a) +``` + + + + + complex + + + +可以查看它的实部,虚部以及共轭: + + +```python +a.real +``` + + + + + 1.0 + + + + +```python +a.imag +``` + + + + + 2.0 + + + + +```python +a.conjugate() +``` + + + + + (1-2j) + + + +##交互计算 + +可以将复杂的表达式放在一起计算: + + +```python +1 + 2 - (3 * 4 / 6) ** 5 + 7 % 5 +``` + + + + + -27 + + + +在**Python**中运算是有优先级的,优先级即算术的先后顺序,比如“先乘除后加减”和“先算括号里面的”都是两种优先级的规则,优先级从高到低排列如下: + +- `( )` 括号 +- `**` 幂指数运算 +- `* / // %` 乘,除,整数除法,取余运算 +- '+ -' 加减 + +整数除法,返回的是比结果小的最大整数值: + + +```python +12.3 // 5.2 +``` + + + + + 2.0 + + + + +```python +12.3 // -4 +``` + + + + + -4.0 + + + +## 简单的数学函数 + +绝对值: + + +```python +abs(-12.4) +``` + + + + + 12.4 + + + +取整: + + +```python +round(21.6) +``` + + + + + 22.0 + + + +最大最小值: + + +```python +print min(2, 3, 4, 5) +print max(2, 4, 3) +``` + + 2 + 4 + + +## 变量名覆盖 + +不要用内置的函数来命名变量,否则会出现意想不到的结果: + + +```python +type(max) +``` + + + + + builtin_function_or_method + + + +不要这样做!!! + + +```python +max = 1 +type(max) +``` + + + + + int + + + + +```python +max(4, 5) +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 max(4, 5) + + + TypeError: 'int' object is not callable + + +## 类型转换 + +浮点数转整型,只保留整数部分: + + +```python +print int(12.324) +print int(-3.32) +``` + + 12 + -3 + + +整型转浮点型: + + +```python +print float(1.2) +``` + + 1.2 + + +## 其他表示 + +除了10进制外,整数还有其他类型的表示方法。 + +科学计数法: + + +```python +1e-6 +``` + + + + + 1e-06 + + + +16进制,前面加`0x`修饰,后面使用数字0-9A-F: + + +```python +0xFF +``` + + + + + 255 + + + +8进制,前面加`0`或者`0o`修饰,后面使用数字0-7: + + +```python +067 +``` + + + + + 55 + + + +2进制,前面加`0b`修饰,后面使用数字0或1: + + +```python +0b101010 +``` + + + + + 42 + + + +## 原地计算 In-place + +**Python**可以使用下面的形式进行原地计算: + + +```python +b = 2.5 +b += 2 +print b +b *= 2 +print b +b -= 3 +print b +``` + + 4.5 + 9.0 + 6.0 + + +## 布尔型 Boolean Data Type + +布尔型可以看成特殊的二值变量,其取值为`True`和`False`: + + +```python +q = True +type(q) +``` + + + + + bool + + + +可以用表达式构建布尔型变量: + + +```python +q = 1 > 2 +print q +``` + + False + + +常用的比较符号包括: + + <, >, <=, >=, ==, != + +**Python**支持链式比较: + + +```python +x = 2 +1 < x <= 3 +``` + + + + + True + + diff --git a/docs/02-python-essentials/02.04-strings.md b/docs/02-python-essentials/02.04-strings.md new file mode 100644 index 00000000..2325644f --- /dev/null +++ b/docs/02-python-essentials/02.04-strings.md @@ -0,0 +1,650 @@ + +# 字符串 + +## 生成字符串 + +**Python**中可以使用一对单引号''或者双引号""生成字符串。 + + +```python +s = "hello, world" +print s +``` + + hello, world + + + +```python +s = 'hello world' +print s +``` + + hello world + + +## 简单操作 + +加法: + + +```python +s = 'hello ' + 'world' +s +``` + + + + + 'hello world' + + + +字符串与数字相乘: + + +```python +"echo" * 3 +``` + + + + + 'echoechoecho' + + + +字符串长度: + + +```python +len(s) +``` + + + + + 11 + + + +## 字符串方法 + +**Python**是一种面向对象的语言,面向对象的语言中一个必不可少的元素就是方法,而字符串是对象的一种,所以有很多可用的方法。 + +跟很多语言一样,**Python**使用以下形式来调用方法: + + 对象.方法(参数) + +### 分割 + +s.split()将s按照空格(包括多个空格,制表符`\t`,换行符`\n`等)分割,并返回所有分割得到的字符串。 + + +```python +line = "1 2 3 4 5" +numbers = line.split() +print numbers +``` + + ['1', '2', '3', '4', '5'] + + +s.split(sep)以给定的sep为分隔符对s进行分割。 + + +```python +line = "1,2,3,4,5" +numbers = line.split(',') +print numbers +``` + + ['1', '2', '3', '4', '5'] + + +### 连接 + +与分割相反,s.join(str_sequence)的作用是以s为连接符将字符串序列str_sequence中的元素连接起来,并返回连接后得到的新字符串: + + +```python +s = ' ' +s.join(numbers) +``` + + + + + '1 2 3 4 5' + + + + +```python +s = ',' +s.join(numbers) +``` + + + + + '1,2,3,4,5' + + + +### 替换 + +s.replace(part1, part2)将字符串s中指定的部分part1替换成想要的部分part2,并返回新的字符串。 + + +```python +s = "hello world" +s.replace('world', 'python') +``` + + + + + 'hello python' + + + +此时,s的值并没有变化,替换方法只是生成了一个新的字符串。 + + +```python +s +``` + + + + + 'hello world' + + + +### 大小写转换 + +s.upper()方法返回一个将s中的字母全部大写的新字符串。 + +s.lower()方法返回一个将s中的字母全部小写的新字符串。 + + +```python +"hello world".upper() +``` + + + + + 'HELLO WORLD' + + + +这两种方法也不会改变原来s的值: + + +```python +s = "HELLO WORLD" +print s.lower() +print s +``` + + hello world + HELLO WORLD + + +### 去除多余空格 + +s.strip()返回一个将s两端的多余空格除去的新字符串。 + +s.lstrip()返回一个将s开头的多余空格除去的新字符串。 + +s.rstrip()返回一个将s结尾的多余空格除去的新字符串。 + + +```python +s = " hello world " +s.strip() +``` + + + + + 'hello world' + + + +s的值依然不会变化: + + +```python +s +``` + + + + + ' hello world ' + + + + +```python +s.lstrip() +``` + + + + + 'hello world ' + + + + +```python +s.rstrip() +``` + + + + + ' hello world' + + + +## 更多方法 + +可以使用dir函数查看所有可以使用的方法: + + +```python +dir(s) +``` + + + + + ['__add__', + '__class__', + '__contains__', + '__delattr__', + '__doc__', + '__eq__', + '__format__', + '__ge__', + '__getattribute__', + '__getitem__', + '__getnewargs__', + '__getslice__', + '__gt__', + '__hash__', + '__init__', + '__le__', + '__len__', + '__lt__', + '__mod__', + '__mul__', + '__ne__', + '__new__', + '__reduce__', + '__reduce_ex__', + '__repr__', + '__rmod__', + '__rmul__', + '__setattr__', + '__sizeof__', + '__str__', + '__subclasshook__', + '_formatter_field_name_split', + '_formatter_parser', + 'capitalize', + 'center', + 'count', + 'decode', + 'encode', + 'endswith', + 'expandtabs', + 'find', + 'format', + 'index', + 'isalnum', + 'isalpha', + 'isdigit', + 'islower', + 'isspace', + 'istitle', + 'isupper', + 'join', + 'ljust', + 'lower', + 'lstrip', + 'partition', + 'replace', + 'rfind', + 'rindex', + 'rjust', + 'rpartition', + 'rsplit', + 'rstrip', + 'split', + 'splitlines', + 'startswith', + 'strip', + 'swapcase', + 'title', + 'translate', + 'upper', + 'zfill'] + + + +## 多行字符串 + +Python 用一对 `"""` 或者 `'''` 来生成多行字符串: + + +```python +a = """hello world. +it is a nice day.""" +print a +``` + + hello world. + it is a nice day. + + +在储存时,我们在两行字符间加上一个换行符 `'\n'` + + +```python +a +``` + + + + + 'hello world.\nit is a nice day.' + + + +## 使用 `()` 或者 `\` 来换行 + +当代码太长或者为了美观起见时,我们可以使用两种方法来将一行代码转为多行代码: + +* () +* \ + + +```python +a = ("hello, world. " + "it's a nice day. " + "my name is xxx") +a +``` + + + + + "hello, world. it's a nice day. my name is xxx" + + + + +```python +a = "hello, world. " \ + "it's a nice day. " \ + "my name is xxx" +a +``` + + + + + "hello, world. it's a nice day. my name is xxx" + + + +## 强制转换为字符串 + +* `str(ob)`强制将`ob`转化成字符串。 +* `repr(ob)`也是强制将`ob`转化成字符串。 + +不同点如下: + + +```python +str(1.1 + 2.2) +``` + + + + + '3.3' + + + + +```python +repr(1.1 + 2.2) +``` + + + + + '3.3000000000000003' + + + +## 整数与不同进制的字符串的转化 + +可以将整数按照不同进制转化为不同类型的字符串。 + +十六进制: + + +```python +hex(255) +``` + + + + + '0xff' + + + +八进制: + + +```python +oct(255) +``` + + + + + '0377' + + + +二进制: + + +```python +bin(255) +``` + + + + + '0b11111111' + + + +可以使用 `int` 将字符串转为整数: + + +```python +int('23') +``` + + + + + 23 + + + +还可以指定按照多少进制来进行转换,最后返回十进制表达的整数: + + +```python +int('FF', 16) +``` + + + + + 255 + + + + +```python +int('377', 8) +``` + + + + + 255 + + + + +```python +int('11111111', 2) +``` + + + + + 255 + + + +`float` 可以将字符串转换为浮点数: + + +```python +float('3.5') +``` + + + + + 3.5 + + + +## 格式化字符串 + +**Python**用字符串的`format()`方法来格式化字符串。 + +具体用法如下,字符串中花括号 `{}` 的部分会被format传入的参数替代,传入的值可以是字符串,也可以是数字或者别的对象。 + + +```python +'{} {} {}'.format('a', 'b', 'c') +``` + + + + + 'a b c' + + + +可以用数字指定传入参数的相对位置: + + +```python +'{2} {1} {0}'.format('a', 'b', 'c') +``` + + + + + 'c b a' + + + +还可以指定传入参数的名称: + + +```python +'{color} {n} {x}'.format(n=10, x=1.5, color='blue') +``` + + + + + 'blue 10 1.5' + + + +可以在一起混用: + + +```python +'{color} {0} {x} {1}'.format(10, 'foo', x = 1.5, color='blue') +``` + + + + + 'blue 10 1.5 foo' + + + +可以用`{:}`指定格式: + + +```python +from math import pi + +'{0:10} {1:10d} {2:10.2f}'.format('foo', 5, 2 * pi) +``` + + + + + 'foo 5 6.28' + + + +具体规则与C中相同。 + +也可以使用旧式的 `%` 方法进行格式化: + + +```python +s = "some numbers:" +x = 1.34 +y = 2 +# 用百分号隔开,括号括起来 +t = "%s %f, %d" % (s, x, y) +``` + + +```python +t +``` + + + + + 'some numbers: 1.340000, 2' + + diff --git a/docs/02-python-essentials/02.05-indexing-and-slicing.md b/docs/02-python-essentials/02.05-indexing-and-slicing.md new file mode 100644 index 00000000..08d3d25d --- /dev/null +++ b/docs/02-python-essentials/02.05-indexing-and-slicing.md @@ -0,0 +1,237 @@ + +# 索引和分片 + +## 索引 + +对于一个有序序列,可以通过索引的方法来访问对应位置的值。字符串便是一个有序序列的例子,**Python**使用 `[]` 来对有序序列进行索引。 + + +```python +s = "hello world" +s[0] +``` + + + + + 'h' + + + +**Python**中索引是从 `0` 开始的,所以索引 `0` 对应与序列的第 `1` 个元素。为了得到第 `5` 个元素,需要使用索引值 `4` 。 + + +```python +s[4] +``` + + + + + 'o' + + + +除了正向索引,**Python**还引入了负索引值的用法,即从后向前开始计数,例如,索引 `-2` 表示倒数第 `2` 个元素: + + +```python +s[-2] +``` + + + + + 'l' + + + +单个索引大于等于字符串的长度时,会报错: + + +```python +s[11] +``` + + + --------------------------------------------------------------------------- + + IndexError Traceback (most recent call last) + + in () + ----> 1 s[11] + + + IndexError: string index out of range + + +## 分片 + +分片用来从序列中提取出想要的子序列,其用法为: + + var[lower:upper:step] + +其范围包括 `lower` ,但不包括 `upper` ,即 `[lower, upper)`, `step` 表示取值间隔大小,如果没有默认为`1`。 + + +```python +s +``` + + + + + 'hello world' + + + + +```python +s[1:3] +``` + + + + + 'el' + + + +分片中包含的元素的个数为 `3-1=2` 。 + +也可以使用负索引来指定分片的范围: + + +```python +s[1:-2] +``` + + + + + 'ello wor' + + + +包括索引 `1` 但是不包括索引 `-2` 。 + +lower和upper可以省略,省略lower意味着从开头开始分片,省略upper意味着一直分片到结尾。 + + +```python +s[:3] +``` + + + + + 'hel' + + + + +```python +s[-3:] +``` + + + + + 'rld' + + + + +```python +s[:] +``` + + + + + 'hello world' + + + +每隔两个取一个值: + + +```python +s[::2] +``` + + + + + 'hlowrd' + + + +当step的值为负时,省略lower意味着从结尾开始分片,省略upper意味着一直分片到开头。 + + +```python +s[::-1] +``` + + + + + 'dlrow olleh' + + + +当给定的upper超出字符串的长度(注意:因为不包含upper,所以可以等于)时,Python并不会报错,不过只会计算到结尾。 + + +```python +s[:100] +``` + + + + + 'hello world' + + + +## 使用“0”作为索引开头的原因 + +### 使用`[low, up)`形式的原因 + +假设需要表示字符串 `hello` 中的内部子串 `el` : + +|方式|`[low, up)`|`(low, up]`|`(lower, upper)`|`[lower, upper]` +|--|--|--|--|--| +|表示|`[1,3)`|`(0,2]`|`(0,3)`|`[1,2]` +|序列长度|`up - low`|`up - low`|`up - low - 1`|`up - low + 1` + +对长度来说,前两种方式比较好,因为不需要烦人的加一减一。 + +现在只考虑前两种方法,假设要表示字符串`hello`中的从头开始的子串`hel`: + +|方式|`[low, up)`|`(low, up]` +|--|--| +|表示|`[0,3)`|`(-1,2]`| +|序列长度|`up - low`|`up - low`| + +第二种表示方法从`-1`开始,不是很好,所以选择使用第一种`[low, up)`的形式。 + +### 使用0-base的形式 + +> Just too beautiful to ignore. +----Guido van Rossum + +两种简单的情况: + +- 从头开始的n个元素; + - 使用0-base:`[0, n)` + - 使用1-base:`[1, n+1)` + +- 第`i+1`个元素到第`i+n`个元素。 + - 使用0-base:`[i, n+i)` + - 使用1-base:`[i+1, n+i+1)` + +1-base有个`+1`部分,所以不推荐。 + +综合这两种原因,**Python**使用0-base的方法来进行索引。 diff --git a/docs/02-python-essentials/02.06-lists.md b/docs/02-python-essentials/02.06-lists.md new file mode 100644 index 00000000..50562f91 --- /dev/null +++ b/docs/02-python-essentials/02.06-lists.md @@ -0,0 +1,567 @@ + +# 列表 + +在**Python**中,列表是一个有序的序列。 + +列表用一对 `[]` 生成,中间的元素用 `,` 隔开,其中的元素不需要是同一类型,同时列表的长度也不固定。 + + +```python +l = [1, 2.0, 'hello'] +print l +``` + + [1, 2.0, 'hello'] + + +空列表可以用 `[]` 或者 `list()` 生成: + + +```python +empty_list = [] +empty_list +``` + + + + + [] + + + + +```python +empty_list = list() +empty_list +``` + + + + + [] + + + +## 列表操作 + +与字符串类似,列表也支持以下的操作: + +### 长度 + +用 `len` 查看列表长度: + + +```python +len(l) +``` + + + + + 3 + + + +### 加法和乘法 + +列表加法,相当于将两个列表按顺序连接: + + +```python +a = [1, 2, 3] +b = [3.2, 'hello'] +a + b +``` + + + + + [1, 2, 3, 3.2, 'hello'] + + + +列表与整数相乘,相当于将列表重复相加: + + +```python +l * 2 +``` + + + + + [1, 2.0, 'hello', 1, 2.0, 'hello'] + + + +### 索引和分片 + +列表和字符串一样可以通过索引和分片来查看它的元素。 + +索引: + + +```python +a = [10, 11, 12, 13, 14] +a[0] +``` + + + + + 10 + + + +反向索引: + + +```python +a[-1] +``` + + + + + 14 + + + +分片: + + +```python +a[2:-1] +``` + + + + + [12, 13] + + + +与字符串不同的是,列表可以通过索引和分片来修改。 + +对于字符串,如果我们通过索引或者分片来修改,**Python**会报错: + + +```python +s = "hello world" +# 把开头的 h 改成大写 +s[0] = 'H' +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + 1 s = "hello world" + 2 # 把开头的 h 改成大写 + ----> 3 s[0] = 'H' + + + TypeError: 'str' object does not support item assignment + + +而这种操作对于列表来说是可以的: + + +```python +a = [10, 11, 12, 13, 14] +a[0] = 100 +print a +``` + + [100, 11, 12, 13, 14] + + +这种赋值也适用于分片,例如,将列表的第2,3两个元素换掉: + + +```python +a[1:3] = [1, 2] +a +``` + + + + + [100, 1, 2, 13, 14] + + + +事实上,对于连续的分片(即步长为 `1` ),**Python**采用的是整段替换的方法,两者的元素个数并不需要相同,例如,将 `[11,12]` 替换为 `[1,2,3,4]`: + + +```python +a = [10, 11, 12, 13, 14] +a[1:3] = [1, 2, 3, 4] +print a +``` + + [10, 1, 2, 3, 4, 13, 14] + + +这意味着,可以用这种方法来删除列表中一个连续的分片: + + +```python +a = [10, 1, 2, 11, 12] +print a[1:3] +a[1:3] = [] +print a +``` + + [1, 2] + [10, 11, 12] + + +对于不连续(间隔step不为1)的片段进行修改时,两者的元素数目必须一致: + + +```python +a = [10, 11, 12, 13, 14] +a[::2] = [1, 2, 3] +a +``` + + + + + [1, 11, 2, 13, 3] + + + +否则会报错: + + +```python +a[::2] = [] +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + ----> 1 a[::2] = [] + + + ValueError: attempt to assign sequence of size 0 to extended slice of size 3 + + +### 删除元素 + +**Python**提供了删除列表中元素的方法 'del'。 + +删除列表中的第一个元素: + + +```python +a = [1002, 'a', 'b', 'c'] +del a[0] +print a +``` + + ['a', 'b', 'c'] + + +删除第2到最后一个元素: + + +```python +a = [1002, 'a', 'b', 'c'] +del a[1:] +a +``` + + + + + [1002] + + + +删除间隔的元素: + + +```python +a = ['a', 1, 'b', 2, 'c'] +del a[::2] +a +``` + + + + + [1, 2] + + + +### 测试从属关系 + +用 `in` 来看某个元素是否在某个序列(不仅仅是列表)中,用not in来判断是否不在某个序列中。 + + +```python +a = [10, 11, 12, 13, 14] +print 10 in a +print 10 not in a +``` + + True + False + + +也可以作用于字符串: + + +```python +s = 'hello world' +print 'he' in s +print 'world' not in s +``` + + True + False + + +列表中可以包含各种对象,甚至可以包含列表: + + +```python +a = [10, 'eleven', [12, 13]] +a[2] +``` + + + + + [12, 13] + + + +a[2]是列表,可以对它再进行索引: + + +```python +a[2][1] +``` + + + + + 13 + + + +## 列表方法 + +### 不改变列表的方法 + +#### 列表中某个元素个数count + +`l.count(ob)` 返回列表中元素 `ob` 出现的次数。 + + +```python +a = [11, 12, 13, 12, 11] +a.count(11) +``` + + + + + 2 + + + +#### 列表中某个元素位置index + +`l.index(ob)` 返回列表中元素 `ob` 第一次出现的索引位置,如果 `ob` 不在 `l` 中会报错。 + + +```python +a.index(12) +``` + + + + + 1 + + + +不存在的元素会报错: + + +```python +a.index(1) +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + ----> 1 a.index(1) + + + ValueError: 1 is not in list + + +### 改变列表的方法 + +#### 向列表添加单个元素 + +`l.append(ob)` 将元素 `ob` 添加到列表 `l` 的最后。 + + +```python +a = [10, 11, 12] +a.append(11) +print a +``` + + [10, 11, 12, 11] + + +append每次只添加一个元素,并不会因为这个元素是序列而将其展开: + + +```python +a.append([11, 12]) +print a +``` + + [10, 11, 12, 11, [11, 12]] + + +#### 向列表添加序列 + +`l.extend(lst)` 将序列 `lst` 的元素依次添加到列表 `l` 的最后,作用相当于 `l += lst`。 + + +```python +a = [10, 11, 12, 11] +a.extend([1, 2]) +print a +``` + + [10, 11, 12, 11, 1, 2] + + +#### 插入元素 + +`l.insert(idx, ob)` 在索引 `idx` 处插入 `ob` ,之后的元素依次后移。 + + +```python +a = [10, 11, 12, 13, 11] +# 在索引 3 插入 'a' +a.insert(3, 'a') +print a +``` + + [10, 11, 12, 'a', 13, 11] + + +#### 移除元素 + +`l.remove(ob)` 会将列表中第一个出现的 `ob` 删除,如果 `ob` 不在 `l` 中会报错。 + + +```python +a = [10, 11, 12, 13, 11] +# 移除了第一个 11 +a.remove(11) +print a +``` + + [10, 12, 13, 11] + + +#### 弹出元素 + +`l.pop(idx)` 会将索引 `idx` 处的元素删除,并返回这个元素。 + + +```python +a = [10, 11, 12, 13, 11] +a.pop(2) +``` + + + + + 12 + + + +#### 排序 + +`l.sort()` 会将列表中的元素按照一定的规则排序: + + +```python +a = [10, 1, 11, 13, 11, 2] +a.sort() +print a +``` + + [1, 2, 10, 11, 11, 13] + + +如果不想改变原来列表中的值,可以使用 `sorted` 函数: + + +```python +a = [10, 1, 11, 13, 11, 2] +b = sorted(a) +print a +print b +``` + + [10, 1, 11, 13, 11, 2] + [1, 2, 10, 11, 11, 13] + + +#### 列表反向 + +`l.reverse()` 会将列表中的元素从后向前排列。 + + +```python +a = [1, 2, 3, 4, 5, 6] +a.reverse() +print a +``` + + [6, 5, 4, 3, 2, 1] + + +如果不想改变原来列表中的值,可以使用这样的方法: + + +```python +a = [1, 2, 3, 4, 5, 6] +b = a[::-1] +print a +print b +``` + + [1, 2, 3, 4, 5, 6] + [6, 5, 4, 3, 2, 1] + + +如果不清楚用法,可以查看帮助: + + +```python +a.sort? +``` diff --git a/docs/02-python-essentials/02.07-mutable-and-immutable-data-types.md b/docs/02-python-essentials/02.07-mutable-and-immutable-data-types.md new file mode 100644 index 00000000..d178e236 --- /dev/null +++ b/docs/02-python-essentials/02.07-mutable-and-immutable-data-types.md @@ -0,0 +1,167 @@ + +# 可变和不可变类型 + +## 列表是可变的(Mutable) + + +```python +a = [1,2,3,4] +a +``` + + + + + [1, 2, 3, 4] + + + +通过索引改变: + + +```python +a[0] = 100 +a +``` + + + + + [100, 2, 3, 4] + + + +通过方法改变: + + +```python +a.insert(3, 200) +a +``` + + + + + [100, 2, 3, 200, 4] + + + + +```python +a.sort() +a +``` + + + + + [2, 3, 4, 100, 200] + + + +## 字符串是不可变的(Immutable) + + +```python +s = "hello world" +s +``` + + + + + 'hello world' + + + +通过索引改变会报错: + + +```python +s[0] = 'z' +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 s[0] = 'z' + + + TypeError: 'str' object does not support item assignment + + +字符串方法只是返回一个新字符串,并不改变原来的值: + + +```python +print s.replace('world', 'Mars') +print s +``` + + hello Mars + hello world + + +如果想改变字符串的值,可以用重新赋值的方法: + + +```python +s = "hello world" +s = s.replace('world', 'Mars') +print s +``` + + hello Mars + + +或者用 `bytearray` 代替字符串: + + +```python +s = bytearray('abcde') +s[1:3] = '12' +s +``` + + + + + bytearray(b'a12de') + + + +数据类型分类: + +|可变数据类型|不可变数据类型| +|--|--| +|`list`, `dictionary`, `set`, `numpy array`, `user defined objects`|`integer`, `float`, `long`, `complex`, `string`, `tuple`, `frozenset` + +## 字符串不可变的原因 + +其一,列表可以通过以下的方法改变,而字符串不支持这样的变化。 + + +```python +a = [1, 2, 3, 4] +b = a +``` + +此时, `a` 和 `b` 指向同一块区域,改变 `b` 的值, `a` 也会同时改变: + + +```python +b[0] = 100 +a +``` + + + + + [100, 2, 3, 4] + + + +其二,是字符串与整数浮点数一样被认为是基本类型,而基本类型在Python中是不可变的。 diff --git a/docs/02-python-essentials/02.08-tuples.md b/docs/02-python-essentials/02.08-tuples.md new file mode 100644 index 00000000..5af07e43 --- /dev/null +++ b/docs/02-python-essentials/02.08-tuples.md @@ -0,0 +1,141 @@ + +# 元组 + +## 基本操作 + +与列表相似,元组`Tuple`也是个有序序列,但是元组是不可变的,用`()`生成。 + + +```python +t = (10, 11, 12, 13, 14) +t +``` + + + + + (10, 11, 12, 13, 14) + + + +可以索引,切片: + + +```python +t[0] +``` + + + + + 10 + + + + +```python +t[1:3] +``` + + + + + (11, 12) + + + +但是元组是不可变的: + + +```python +# 会报错 +t[0] = 1 +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + 1 # 会报错 + ----> 2 t[0] = 1 + + + TypeError: 'tuple' object does not support item assignment + + +## 单个元素的元组生成 + +由于`()`在表达式中被应用,只含有单个元素的元组容易和表达式混淆,所以采用下列方式定义只有一个元素的元组: + + +```python +a = (10,) +print a +print type(a) +``` + + (10,) + + + + +```python +a = (10) +print type(a) +``` + + + + +将列表转换为元组: + + +```python +a = [10, 11, 12, 13, 14] +tuple(a) +``` + + + + + (10, 11, 12, 13, 14) + + + +## 元组方法 + +由于元组是不可变的,所以只能有一些不可变的方法,例如计算元素个数 `count` 和元素位置 `index` ,用法与列表一样。 + + +```python +a.count(10) +``` + + + + + 1 + + + + +```python +a.index(12) +``` + + + + + 2 + + + +## 为什么需要元组 + +旧式字符串格式化中参数要用元组; + +在字典中当作键值; + +数据库的返回值…… diff --git a/docs/02-python-essentials/02.09-speed-comparison-between-list-&-tuple.md b/docs/02-python-essentials/02.09-speed-comparison-between-list-&-tuple.md new file mode 100644 index 00000000..cd71e410 --- /dev/null +++ b/docs/02-python-essentials/02.09-speed-comparison-between-list-&-tuple.md @@ -0,0 +1,75 @@ + +# 列表与元组的速度比较 + +IPython 中用 `magic` 命令 `%timeit` 来计时。 + +## 比较生成速度 + + +```python +%timeit [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] +``` + + 1000000 loops, best of 3: 456 ns per loop + + + +```python +%timeit (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25) +``` + + 10000000 loops, best of 3: 23 ns per loop + + +可以看到,元组的生成速度要比列表的生成速度快得多,相差大概一个数量级。 + +## 比较遍历速度 + +产生内容相同的随机列表和元组: + + +```python +from numpy.random import rand +values = rand(10000,4) +lst = [list(row) for row in values] +tup = tuple(tuple(row) for row in values) +``` + + +```python + %timeit for row in lst: list(row) +``` + + 100 loops, best of 3: 4.12 ms per loop + + + +```python +%timeit for row in tup: tuple(row) +``` + + 100 loops, best of 3: 2.07 ms per loop + + +在遍历上,元组和列表的速度表现差不多。 + +## 比较遍历和索引速度: + + +```python +%timeit for row in lst: a = row[0] + 1 +``` + + The slowest run took 12.20 times longer than the fastest. This could mean that an intermediate result is being cached + 100 loops, best of 3: 3.73 ms per loop + + + +```python +%timeit for row in tup: a = row[0] + 1 +``` + + 100 loops, best of 3: 3.82 ms per loop + + +元组的生成速度会比列表快很多,迭代速度快一点,索引速度差不多。 diff --git a/docs/02-python-essentials/02.10-dictionaries.md b/docs/02-python-essentials/02.10-dictionaries.md new file mode 100644 index 00000000..841e7d71 --- /dev/null +++ b/docs/02-python-essentials/02.10-dictionaries.md @@ -0,0 +1,583 @@ + +# 字典 + +字典 `dictionary` ,在一些编程语言中也称为 `hash` , `map` ,是一种由键值对组成的数据结构。 + +顾名思义,我们把键想象成字典中的单词,值想象成词对应的定义,那么—— + +一个词可以对应一个或者多个定义,但是这些定义只能通过这个词来进行查询。 + +## 基本操作 + +### 空字典 + +**Python** 使用 `{}` 或者 `dict()` 来创建一个空的字典: + + +```python +a = {} +type(a) +``` + + + + + dict + + + + +```python +a = dict() +type(a) +``` + + + + + dict + + + +有了dict之后,可以用索引键值的方法向其中添加元素,也可以通过索引来查看元素的值: + +### 插入键值 + + +```python +a["one"] = "this is number 1" +a["two"] = "this is number 2" +a +``` + + + + + {'one': 'this is number 1', 'two': 'this is number 2'} + + + +### 查看键值 + + +```python +a['one'] +``` + + + + + 'this is number 1' + + + +### 更新键值 + + +```python +a["one"] = "this is number 1, too" +a +``` + + + + + {'one': 'this is number 1, too', 'two': 'this is number 2'} + + + +### 初始化字典 + +可以看到,Python使用`key: value`这样的结构来表示字典中的元素结构,事实上,可以直接使用这样的结构来初始化一个字典: + + +```python +b = {'one': 'this is number 1', 'two': 'this is number 2'} +b['one'] +``` + + + + + 'this is number 1' + + + +### 字典没有顺序 + +当我们 `print` 一个字典时,**Python**并不一定按照插入键值的先后顺序进行显示,因为字典中的键本身不一定是有序的。 + + +```python +print a +``` + + {'two': 'this is number 2', 'one': 'this is number 1, too'} + + + +```python +print b +``` + + {'two': 'this is number 2', 'one': 'this is number 1'} + + +因此,**Python**中不能用支持用数字索引按顺序查看字典中的值,而且数字本身也有可能成为键值,这样会引起混淆: + + +```python +# 会报错 +a[0] +``` + + + --------------------------------------------------------------------------- + + KeyError Traceback (most recent call last) + + in () + 1 # 会报错 + ----> 2 a[0] + + + KeyError: 0 + + +### 键必须是不可变的类型 + +出于hash的目的,Python中要求这些键值对的**键**必须是**不可变**的,而值可以是任意的Python对象。 + +一个表示近义词的字典: + + +```python +synonyms = {} +synonyms['mutable'] = ['changeable', 'variable', 'varying', 'fluctuating', + 'shifting', 'inconsistent', 'unpredictable', 'inconstant', + 'fickle', 'uneven', 'unstable', 'protean'] +synonyms['immutable'] = ['fixed', 'set', 'rigid', 'inflexible', + 'permanent', 'established', 'carved in stone'] +synonyms +``` + + + + + {'immutable': ['fixed', + 'set', + 'rigid', + 'inflexible', + 'permanent', + 'established', + 'carved in stone'], + 'mutable': ['changeable', + 'variable', + 'varying', + 'fluctuating', + 'shifting', + 'inconsistent', + 'unpredictable', + 'inconstant', + 'fickle', + 'uneven', + 'unstable', + 'protean']} + + + +另一个例子: + + +```python +# 定义四个字典 +e1 = {'mag': 0.05, 'width': 20} +e2 = {'mag': 0.04, 'width': 25} +e3 = {'mag': 0.05, 'width': 80} +e4 = {'mag': 0.03, 'width': 30} +# 以字典作为值传入新的字典 +events = {500: e1, 760: e2, 3001: e3, 4180: e4} +events +``` + + + + + {500: {'mag': 0.05, 'width': 20}, + 760: {'mag': 0.04, 'width': 25}, + 3001: {'mag': 0.05, 'width': 80}, + 4180: {'mag': 0.03, 'width': 30}} + + + +键(或者值)的数据类型可以不同: + + +```python +people = [ + {'first': 'Sam', 'last': 'Malone', 'name': 35}, + {'first': 'Woody', 'last': 'Boyd', 'name': 21}, + {'first': 'Norm', 'last': 'Peterson', 'name': 34}, + {'first': 'Diane', 'last': 'Chambers', 'name': 33} +] +people +``` + + + + + [{'first': 'Sam', 'last': 'Malone', 'name': 35}, + {'first': 'Woody', 'last': 'Boyd', 'name': 21}, + {'first': 'Norm', 'last': 'Peterson', 'name': 34}, + {'first': 'Diane', 'last': 'Chambers', 'name': 33}] + + + +### 使用 dict 初始化字典 + +除了通常的定义方式,还可以通过 `dict()` 转化来生成字典: + + +```python +inventory = dict( + [('foozelator', 123), + ('frombicator', 18), + ('spatzleblock', 34), + ('snitzelhogen', 23) + ]) +inventory +``` + + + + + {'foozelator': 123, 'frombicator': 18, 'snitzelhogen': 23, 'spatzleblock': 34} + + + +利用索引直接更新键值对: + + +```python +inventory['frombicator'] += 1 +inventory +``` + + + + + {'foozelator': 123, 'frombicator': 19, 'snitzelhogen': 23, 'spatzleblock': 34} + + + +## 适合做键的类型 + +在不可变类型中,整数和字符串是字典中最常用的类型;而浮点数通常不推荐用来做键,原因如下: + + +```python +data = {} +data[1.1 + 2.2] = 6.6 +# 会报错 +data[3.3] +``` + + + --------------------------------------------------------------------------- + + KeyError Traceback (most recent call last) + + in () + 2 data[1.1 + 2.2] = 6.6 + 3 # 会报错 + ----> 4 data[3.3] + + + KeyError: 3.3 + + +事实上,观察`data`的值就会发现,这个错误是由浮点数的精度问题所引起的: + + +```python +data +``` + + + + + {3.3000000000000003: 6.6} + + + +有时候,也可以使用元组作为键值,例如,可以用元组做键来表示从第一个城市飞往第二个城市航班数的多少: + + +```python +connections = {} +connections[('New York', 'Seattle')] = 100 +connections[('Austin', 'New York')] = 200 +connections[('New York', 'Austin')] = 400 +``` + +元组是有序的,因此 `('New York', 'Austin')` 和 `('Austin', 'New York')` 是两个不同的键: + + +```python +print connections[('Austin', 'New York')] +print connections[('New York', 'Austin')] +``` + + 200 + 400 + + +## 字典方法 + +### `get` 方法 + +之前已经见过,用索引可以找到一个键对应的值,但是当字典中没有这个键的时候,Python会报错,这时候可以使用字典的 `get` 方法来处理这种情况,其用法如下: + + `d.get(key, default = None)` + +返回字典中键 `key` 对应的值,如果没有这个键,返回 `default` 指定的值(默认是 `None` )。 + + +```python +a = {} +a["one"] = "this is number 1" +a["two"] = "this is number 2" +``` + +索引不存在的键值会报错: + + +```python +a["three"] +``` + + + --------------------------------------------------------------------------- + + KeyError Traceback (most recent call last) + + in () + ----> 1 a["three"] + + + KeyError: 'three' + + +改用get方法: + + +```python +print a.get("three") +``` + + None + + +指定默认值参数: + + +```python +a.get("three", "undefined") +``` + + + + + 'undefined' + + + +### `pop` 方法删除元素 + +`pop` 方法可以用来弹出字典中某个键对应的值,同时也可以指定默认参数: + + `d.pop(key, default = None)` + +删除并返回字典中键 `key` 对应的值,如果没有这个键,返回 `default` 指定的值(默认是 `None` )。 + + +```python +a +``` + + + + + {'one': 'this is number 1', 'two': 'this is number 2'} + + + +弹出并返回值: + + +```python +a.pop("two") +``` + + + + + 'this is number 2' + + + + +```python +a +``` + + + + + {'one': 'this is number 1'} + + + +弹出不存在的键值: + + +```python +a.pop("two", 'not exist') +``` + + + + + 'not exist' + + + +与列表一样,`del` 函数可以用来删除字典中特定的键值对,例如: + + +```python +del a["one"] +a +``` + + + + + {} + + + +### `update`方法更新字典 + +之前已经知道,可以通过索引来插入、修改单个键值对,但是如果想对多个键值对进行操作,这种方法就显得比较麻烦,好在有 `update` 方法: + + `d.update(newd)` + +将字典`newd`中的内容更新到`d`中去。 + + +```python +person = {} +person['first'] = "Jmes" +person['last'] = "Maxwell" +person['born'] = 1831 +print person +``` + + {'born': 1831, 'last': 'Maxwell', 'first': 'Jmes'} + + +把'first'改成'James',同时插入'middle'的值'Clerk': + + +```python +person_modifications = {'first': 'James', 'middle': 'Clerk'} +person.update(person_modifications) +print person +``` + + {'middle': 'Clerk', 'born': 1831, 'last': 'Maxwell', 'first': 'James'} + + +### `in`查询字典中是否有该键 + + +```python +barn = {'cows': 1, 'dogs': 5, 'cats': 3} +``` + +`in` 可以用来判断字典中是否有某个特定的键: + + +```python +'chickens' in barn +``` + + + + + False + + + + +```python +'cows' in barn +``` + + + + + True + + + +### `keys` 方法,`values` 方法和`items` 方法 + + `d.keys()` + +返回一个由所有键组成的列表; + + `d.values()` + +返回一个由所有值组成的列表; + + `d.items()` + +返回一个由所有键值对元组组成的列表; + + +```python +barn.keys() +``` + + + + + ['cows', 'cats', 'dogs'] + + + + +```python +barn.values() +``` + + + + + [1, 3, 5] + + + + +```python +barn.items() +``` + + + + + [('cows', 1), ('cats', 3), ('dogs', 5)] + + diff --git a/docs/02-python-essentials/02.11-sets.md b/docs/02-python-essentials/02.11-sets.md new file mode 100644 index 00000000..76a326b5 --- /dev/null +++ b/docs/02-python-essentials/02.11-sets.md @@ -0,0 +1,549 @@ + +# 集合 + +之前看到的列表和字符串都是一种有序序列,而集合 `set` 是一种无序的序列。 + +因为集合是无序的,所以当集合中存在两个同样的元素的时候,Python只会保存其中的一个(唯一性);同时为了确保其中不包含同样的元素,集合中放入的元素只能是不可变的对象(确定性)。 + +## 集合生成 + +可以用`set()`函数来显示的生成空集合: + + +```python +a = set() +type(a) +``` + + + + + set + + + +也可以使用一个列表来初始化一个集合: + + +```python +a = set([1, 2, 3, 1]) +a +``` + + + + + {1, 2, 3} + + + +集合会自动去除重复元素 `1`。 + +可以看到,集合中的元素是用大括号`{}`包含起来的,这意味着可以用`{}`的形式来创建集合: + + +```python +a = {1, 2, 3, 1} +a +``` + + + + + {1, 2, 3} + + + +但是创建空集合的时候只能用`set`来创建,因为在Python中`{}`创建的是一个空的字典: + + +```python +s = {} +type(s) +``` + + + + + dict + + + +## 集合操作 + +假设有这样两个集合: + + +```python +a = {1, 2, 3, 4} +b = {3, 4, 5, 6} +``` + +### 并 + +两个集合的并,返回包含两个集合所有元素的集合(去除重复)。 + +可以用方法 `a.union(b)` 或者操作 `a | b` 实现。 + + +```python +a.union(b) +``` + + + + + {1, 2, 3, 4, 5, 6} + + + + +```python +b.union(a) +``` + + + + + {1, 2, 3, 4, 5, 6} + + + + +```python +a | b +``` + + + + + {1, 2, 3, 4, 5, 6} + + + +### 交 + +两个集合的交,返回包含两个集合共有元素的集合。 + +可以用方法 `a.intersection(b)` 或者操作 `a & b` 实现。 + + +```python +a.intersection(b) +``` + + + + + {3, 4} + + + + +```python +b.intersection(a) +``` + + + + + {3, 4} + + + + +```python +a & b +``` + + + + + {3, 4} + + + + +```python +print(a & b) +``` + + set([3, 4]) + + +注意:一般使用print打印set的结果与表示方法并不一致。 + +### 差 + +`a` 和 `b` 的差集,返回只在 `a` 不在 `b` 的元素组成的集合。 + +可以用方法 `a.difference(b)` 或者操作 `a - b` 实现。 + + +```python +a.difference(b) +``` + + + + + {1, 2} + + + + +```python +a - b +``` + + + + + {1, 2} + + + +注意,`a - b` 与 `b - a`并不一样,`b - a` 返回的是返回 b 不在 a 的元素组成的集合: + + +```python +b.difference(a) +``` + + + + + {5, 6} + + + + +```python +b - a +``` + + + + + {5, 6} + + + +### 对称差 + +`a` 和`b` 的对称差集,返回在 `a` 或在 `b` 中,但是不同时在 `a` 和 `b` 中的元素组成的集合。 + +可以用方法 `a.symmetric_difference(b)` 或者操作 `a ^ b` 实现(异或操作符)。 + + +```python +a.symmetric_difference(b) +``` + + + + + {1, 2, 5, 6} + + + + +```python +b.symmetric_difference(a) +``` + + + + + {1, 2, 5, 6} + + + + +```python +a ^ b +``` + + + + + {1, 2, 5, 6} + + + +### 包含关系 + +假设现在有这样两个集合: + + +```python +a = {1, 2, 3} +b = {1, 2} +``` + +要判断 `b` 是不是 `a` 的子集,可以用 `b.issubset(a)` 方法,或者更简单的用操作 `b <= a` : + + +```python +b.issubset(a) +``` + + + + + True + + + + +```python +b <= a +``` + + + + + True + + + +与之对应,也可以用 `a.issuperset(b)` 或者 `a >= b` 来判断: + + +```python +a.issuperset(b) +``` + + + + + True + + + + +```python +a >= b +``` + + + + + True + + + +方法只能用来测试子集,但是操作符可以用来判断真子集: + + +```python +a <= a +``` + + + + + True + + + +自己不是自己的真子集: + + +```python +a < a +``` + + + + + False + + + +## 集合方法 + +### `add` 方法向集合添加单个元素 + +跟列表的 `append` 方法类似,用来向集合添加单个元素。 + + s.add(a) + +将元素 `a` 加入集合 `s` 中。 + + +```python +t = {1, 2, 3} +t.add(5) +t +``` + + + + + {1, 2, 3, 5} + + + +如果添加的是已有元素,集合不改变: + + +```python +t.add(3) +t +``` + + + + + {1, 2, 3, 5} + + + +### `update` 方法向集合添加多个元素 + +跟列表的`extend`方法类似,用来向集合添加多个元素。 + + s.update(seq) + +将`seq`中的元素添加到`s`中。 + + +```python +t.update([5, 6, 7]) +t +``` + + + + + {1, 2, 3, 5, 6, 7} + + + +### `remove` 方法移除单个元素 + + s.remove(ob) + +从集合`s`中移除元素`ob`,如果不存在会报错。 + + +```python +t.remove(1) +t +``` + + + + + {2, 3, 5, 6, 7} + + + + +```python +t.remove(10) +``` + + + --------------------------------------------------------------------------- + + KeyError Traceback (most recent call last) + + in () + ----> 1 t.remove(10) + + + KeyError: 10 + + +### pop方法弹出元素 + +由于集合没有顺序,不能像列表一样按照位置弹出元素,所以`pop` 方法删除并返回集合中任意一个元素,如果集合中没有元素会报错。 + + +```python +t.pop() +``` + + + + + {3, 5, 6, 7} + + + + +```python +print t +``` + + set([3, 5, 6, 7]) + + + +```python +s = set() +# 报错 +s.pop() +``` + + + --------------------------------------------------------------------------- + + KeyError Traceback (most recent call last) + + in () + 1 s = set() + 2 # 报错 + ----> 3 s.pop() + + + KeyError: 'pop from an empty set' + + +### discard 方法 + +作用与 `remove` 一样,但是当元素在集合中不存在的时候不会报错。 + + +```python +t.discard(3) +``` + + +```python +t +``` + + + + + {5, 6, 7} + + + +不存在的元素不会报错: + + +```python +t.discard(20) +``` + + +```python +t +``` + + + + + {5, 6, 7} + + + +### difference_update方法 + + a.difference_update(b) + +从a中去除所有属于b的元素: diff --git a/docs/02-python-essentials/02.12-frozen-sets.md b/docs/02-python-essentials/02.12-frozen-sets.md new file mode 100644 index 00000000..7adfaf11 --- /dev/null +++ b/docs/02-python-essentials/02.12-frozen-sets.md @@ -0,0 +1,68 @@ + +# 不可变集合 + +对应于元组(`tuple`)与列表(`list`)的关系,对于集合(`set`),**Python**提供了一种叫做不可变集合(`frozen set`)的数据结构。 + +使用 `frozenset` 来进行创建: + + +```python +s = frozenset([1, 2, 3, 'a', 1]) +s +``` + + + + + frozenset({1, 2, 3, 'a'}) + + + +与集合不同的是,不可变集合一旦创建就不可以改变。 + +不可变集合的一个主要应用是用来作为字典的键,例如用一个字典来记录两个城市之间的距离: + + +```python +flight_distance = {} +city_pair = frozenset(['Los Angeles', 'New York']) +flight_distance[city_pair] = 2498 +flight_distance[frozenset(['Austin', 'Los Angeles'])] = 1233 +flight_distance[frozenset(['Austin', 'New York'])] = 1515 +flight_distance +``` + + + + + {frozenset({'Austin', 'New York'}): 1515, + frozenset({'Austin', 'Los Angeles'}): 1233, + frozenset({'Los Angeles', 'New York'}): 2498} + + + +由于集合不分顺序,所以不同顺序不会影响查阅结果: + + +```python +flight_distance[frozenset(['New York','Austin'])] +``` + + + + + 1515 + + + + +```python +flight_distance[frozenset(['Austin','New York'])] +``` + + + + + 1515 + + diff --git a/docs/02-python-essentials/02.13-how-python-assignment-works.md b/docs/02-python-essentials/02.13-how-python-assignment-works.md new file mode 100644 index 00000000..d4473e98 --- /dev/null +++ b/docs/02-python-essentials/02.13-how-python-assignment-works.md @@ -0,0 +1,325 @@ + +# Python 赋值机制 + +先看一个例子: + + +```python +x = [1, 2, 3] +y = x +x[1] = 100 +print y +``` + + [1, 100, 3] + + +改变变量`x`的值,变量`y`的值也随着改变,这与**Python**内部的赋值机制有关。 + +## 简单类型 + +先来看这一段代码在**Python**中的执行过程。 + +```python +x = 500 +y = x +y = 'foo' +``` + +- `x = 500` + +**Python**分配了一个 `PyInt` 大小的内存 `pos1` 用来储存对象 `500` ,然后,Python在命名空间中让变量 `x` 指向了这一块内存,注意,整数是不可变类型,所以这块内存的内容是不可变的。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)| `x : pos1` | + +- `y = x ` + +**Python**并没有使用新的内存来储存变量 `y` 的值,而是在命名空间中,让变量 `y` 与变量 `x` 指向了同一块内存空间。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)|`x : pos1`
`y : pos1`| + +- `y = 'foo'` + +**Python**此时分配一个 `PyStr` 大小的内存 `pos2` 来储存对象 `foo` ,然后改变变量 `y` 所指的对象。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)
`pos2 : PyStr('foo')` (不可变)|`x : pos1`
`y : pos2`| + +对这一过程进行验证,可以使用 `id` 函数。 + + id(x) + +返回变量 `x` 的内存地址。 + + +```python +x = 500 +id(x) +``` + + + + + 48220272L + + + + +```python +y = x +id(y) +``` + + + + + 48220272L + + + +也可以使用 `is` 来判断是不是指向同一个事物: + + +```python +x is y +``` + + + + + True + + + +现在 `y` 指向另一块内存: + + +```python +y = 'foo' +id(y) +``` + + + + + 39148320L + + + + +```python +x is y +``` + + + + + False + + + +**Python**会为每个出现的对象进行赋值,哪怕它们的值是一样的,例如: + + +```python +x = 500 +id(x) +``` + + + + + 48220296L + + + + +```python +y = 500 +id(y) +``` + + + + + 48220224L + + + + +```python +x is y +``` + + + + + False + + + +不过,为了提高内存利用效率,对于一些简单的对象,如一些数值较小的int对象,**Python**采用了重用对象内存的办法: + + +```python +x = 2 +id(x) +``` + + + + + 6579504L + + + + +```python +y = 2 +id(y) +``` + + + + + 6579504L + + + + +```python +x is y +``` + + + + + True + + + +## 容器类型 + +现在来看另一段代码: + +``` python +x = [500, 501, 502] +y = x +y[1] = 600 +y = [700, 800] +``` + +- `x = [500, 501, 502]` + +Python为3个PyInt分配内存 `pos1` , `pos2` , `pos3` (不可变),然后为列表分配一段内存 `pos4` ,它包含3个位置,分别指向这3个内存,最后再让变量 `x` 指向这个列表。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)
`pos2 : PyInt(501)` (不可变)
`pos3 : PyInt(502)` (不可变)
`pos4 : PyList(pos1, pos2, pos3)` (可变)|`x : pos4`| + +- `y = x` + +并没有创建新的对象,只需要将 `y` 指向 `pos4` 即可。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)
`pos2 : PyInt(501)` (不可变)
`pos3 : PyInt(502)` (不可变)
`pos4 : PyList(pos1, pos2, pos3)` (可变)|`x : pos4`
`y : pos4`| + +- `y[1] = 600` + +原来 `y[1]` 这个位置指向的是 `pos2` ,由于不能修改 `pos2` 的值,所以首先为 `600` 分配新内存 `pos5` 。 + +再把 `y[1]` 指向的位置修改为 `pos5` 。此时,由于 `pos2` 位置的对象已经没有用了,**Python**会自动调用垃圾处理机制将它回收。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)
`pos2 :` 垃圾回收
`pos3 : PyInt(502)` (不可变)
`pos4 : PyList(pos1, pos5, pos3)` (可变)
`pos5 : PyInt(600)` (不可变) |`x : pos4`
`y : pos4`| + +- `y = [700, 800]` + +首先创建这个列表,然后将变量 `y` 指向它。 + +|内存|命名空间| +|---|---| +|`pos1 : PyInt(500)` (不可变)
`pos3 : PyInt(502)` (不可变)
`pos4 : PyList(pos1, pos5, pos3)` (可变)
`pos5 : PyInt(600)` (不可变)
`pos6 : PyInt(700)` (不可变)
`pos7 : PyInt(800)` (不可变)
`pos8 : PyList(pos6, pos7)` (可变)|`x : pos4`
`y : pos8`| + +对这一过程进行验证: + + +```python +x = [500, 501, 502] +print id(x[0]) +print id(x[1]) +print id(x[2]) +print id(x) +``` + + 48220224 + 48220248 + 48220200 + 54993032 + + +赋值,`id(y)` 与 `id(x)` 相同。 + + +```python +y = x +print id(y) +``` + + 54993032 + + + +```python +x is y +``` + + + + + True + + + +修改 `y[1]` ,`id(y)` 并不改变。 + + +```python +y[1] = 600 +print id(y) +``` + + 54993032 + + +`id(x[1])` 和 `id(y[1])` 的值改变了。 + + +```python +print id(x[1]) +print id(y[1]) +``` + + 48220272 + 48220272 + + +更改 `y` 的值,`id(y)` 的值改变 + + +```python +y = [700, 800] +print id(y) +print id(x) +``` + + 54995272 + 54993032 + diff --git a/docs/02-python-essentials/02.14-if-statement.md b/docs/02-python-essentials/02.14-if-statement.md new file mode 100644 index 00000000..325c554c --- /dev/null +++ b/docs/02-python-essentials/02.14-if-statement.md @@ -0,0 +1,194 @@ + +# 判断语句 + +## 基本用法 + +判断,基于一定的条件,决定是否要执行特定的一段代码,例如判断一个数是不是正数: + + +```python +x = 0.5 +if x > 0: + print "Hey!" + print "x is positive" +``` + + Hey! + x is positive + + +在这里,如果 `x > 0` 为 `False` ,那么程序将不会执行两条 `print` 语句。 + +虽然都是用 `if` 关键词定义判断,但与**C,Java**等语言不同,**Python**不使用 `{}` 将 `if` 语句控制的区域包含起来。**Python**使用的是缩进方法。同时,也不需要用 `()` 将判断条件括起来。 + +上面例子中的这两条语句: +```python + print "Hey!" + print "x is positive" +``` +就叫做一个代码块,同一个代码块使用同样的缩进值,它们组成了这条 `if` 语句的主体。 + +不同的缩进值表示不同的代码块,例如: + +`x > 0` 时: + + +```python +x = 0.5 +if x > 0: + print "Hey!" + print "x is positive" + print "This is still part of the block" +print "This isn't part of the block, and will always print." +``` + + Hey! + x is positive + This is still part of the block + This isn't part of the block, and will always print. + + +`x < 0` 时: + + +```python +x = -0.5 +if x > 0: + print "Hey!" + print "x is positive" + print "This is still part of the block" +print "This isn't part of the block, and will always print." +``` + + This isn't part of the block, and will always print. + + +在这两个例子中,最后一句并不是`if`语句中的内容,所以不管条件满不满足,它都会被执行。 + +一个完整的 `if` 结构通常如下所示(注意:条件后的 `:` 是必须要的,缩进值需要一样): + + if : + + + elif : + + else: + + +当条件1被满足时,执行 `if` 下面的语句,当条件1不满足的时候,转到 `elif` ,看它的条件2满不满足,满足执行 `elif` 下面的语句,不满足则执行 `else` 下面的语句。 + +对于上面的例子进行扩展: + + +```python +x = 0 +if x > 0: + print "x is positive" +elif x == 0: + print "x is zero" +else: + print "x is negative" +``` + + x is zero + + +`elif` 的个数没有限制,可以是1个或者多个,也可以没有。 + +`else` 最多只有1个,也可以没有。 + +可以使用 `and` , `or` , `not` 等关键词结合多个判断条件: + + +```python +x = 10 +y = -5 +x > 0 and y < 0 +``` + + + + + True + + + + +```python +not x > 0 +``` + + + + + False + + + + +```python +x < 0 or y < 0 +``` + + + + + True + + + +这里使用这个简单的例子,假如想判断一个年份是不是闰年,按照闰年的定义,这里只需要判断这个年份是不是能被4整除,但是不能被100整除,或者正好被400整除: + + +```python +year = 1900 +if year % 400 == 0: + print "This is a leap year!" +# 两个条件都满足才执行 +elif year % 4 == 0 and year % 100 != 0: + print "This is a leap year!" +else: + print "This is not a leap year." +``` + + This is not a leap year. + + +## 值的测试 + +**Python**不仅仅可以使用布尔型变量作为条件,它可以直接在`if`中使用任何表达式作为条件: + +大部分表达式的值都会被当作`True`,但以下表达式值会被当作`False`: + +- False +- None +- 0 +- 空字符串,空列表,空字典,空集合 + + +```python +mylist = [3, 1, 4, 1, 5, 9] +if mylist: + print "The first element is:", mylist[0] +else: + print "There is no first element." +``` + + The first element is: 3 + + +修改为空列表: + + +```python +mylist = [] +if mylist: + print "The first element is:", mylist[0] +else: + print "There is no first element." +``` + + There is no first element. + + +当然这种用法并不推荐,推荐使用 `if len(mylist) > 0:` 来判断一个列表是否为空。 diff --git a/docs/02-python-essentials/02.15-loops.md b/docs/02-python-essentials/02.15-loops.md new file mode 100644 index 00000000..50d0333a --- /dev/null +++ b/docs/02-python-essentials/02.15-loops.md @@ -0,0 +1,205 @@ + +# 循环 + +循环的作用在于将一段代码重复执行多次。 + +## while 循环 + + while : + + +**Python**会循环执行``,直到``不满足为止。 + +例如,计算数字`0`到`1000000`的和: + + +```python +i = 0 +total = 0 +while i < 1000000: + total += i + i += 1 +print total +``` + + 499999500000 + + +之前提到,空容器会被当成 `False` ,因此可以用 `while` 循环来读取容器中的所有元素: + + +```python +plays = set(['Hamlet', 'Macbeth', 'King Lear']) +while plays: + play = plays.pop() + print 'Perform', play +``` + + Perform King Lear + Perform Macbeth + Perform Hamlet + + +循环每次从 `plays` 中弹出一个元素,一直到 `plays` 为空为止。 + +## for 循环 + + for in : + + +`for` 循环会遍历完``中所有元素为止 + +上一个例子可以改写成如下形式: + + +```python +plays = set(['Hamlet', 'Macbeth', 'King Lear']) +for play in plays: + print 'Perform', play +``` + + Perform King Lear + Perform Macbeth + Perform Hamlet + + +使用 `for` 循环时,注意尽量不要改变 `plays` 的值,否则可能会产生意想不到的结果。 + +之前的求和也可以通过 `for` 循环来实现: + + +```python +total = 0 +for i in range(100000): + total += i +print total +``` + + 4999950000 + + +然而这种写法有一个缺点:在循环前,它会生成一个长度为 `100000` 的临时列表。 + +生成列表的问题在于,会有一定的时间和内存消耗,当数字从 `100000` 变得更大时,时间和内存的消耗会更加明显。 + +为了解决这个问题,我们可以使用 `xrange` 来代替 `range` 函数,其效果与`range`函数相同,但是 `xrange` 并不会一次性的产生所有的数据: + + +```python +total = 0 +for i in xrange(100000): + total += i +print total +``` + + 4999950000 + + +比较一下两者的运行时间: + + +```python +%timeit for i in xrange(1000000): i = i +``` + + 10 loops, best of 3: 40.7 ms per loop + + + +```python +%timeit for i in range(1000000): i = i +``` + + 10 loops, best of 3: 96.6 ms per loop + + +可以看出,`xrange` 用时要比 `range` 少。 + +## continue 语句 + +遇到 `continue` 的时候,程序会返回到循环的最开始重新执行。 + +例如在循环中忽略一些特定的值: + + +```python +values = [7, 6, 4, 7, 19, 2, 1] +for i in values: + if i % 2 != 0: + # 忽略奇数 + continue + print i/2 +``` + + 3 + 2 + 1 + + +## break 语句 + +遇到 `break` 的时候,程序会跳出循环,不管循环条件是不是满足: + + +```python +command_list = ['start', + 'process', + 'process', + 'process', + 'stop', + 'start', + 'process', + 'stop'] +while command_list: + command = command_list.pop(0) + if command == 'stop': + break + print(command) +``` + + start + process + process + process + + +在遇到第一个 `'stop'` 之后,程序跳出循环。 + +## else语句 + +与 `if` 一样, `while` 和 `for` 循环后面也可以跟着 `else` 语句,不过要和`break`一起连用。 + +- 当循环正常结束时,循环条件不满足, `else` 被执行; +- 当循环被 `break` 结束时,循环条件仍然满足, `else` 不执行。 + +不执行: + + +```python +values = [7, 6, 4, 7, 19, 2, 1] +for x in values: + if x <= 10: + print 'Found:', x + break +else: + print 'All values greater than 10' +``` + + Found: 7 + + +执行: + + +```python +values = [11, 12, 13, 100] +for x in values: + if x <= 10: + print 'Found:', x + break +else: + print 'All values greater than 10' +``` + + All values greater than 10 + diff --git a/docs/02-python-essentials/02.16-list-comprehension.md b/docs/02-python-essentials/02.16-list-comprehension.md new file mode 100644 index 00000000..c246b0b6 --- /dev/null +++ b/docs/02-python-essentials/02.16-list-comprehension.md @@ -0,0 +1,105 @@ + +# 列表推导式 + +循环可以用来生成列表: + + +```python +values = [10, 21, 4, 7, 12] +squares = [] +for x in values: + squares.append(x**2) +print squares +``` + + [100, 441, 16, 49, 144] + + +列表推导式可以使用更简单的方法来创建这个列表: + + +```python +values = [10, 21, 4, 7, 12] +squares = [x**2 for x in values] +print squares +``` + + [100, 441, 16, 49, 144] + + +还可以在列表推导式中加入条件进行筛选。 + +例如在上面的例子中,假如只想保留列表中不大于`10`的数的平方: + + +```python +values = [10, 21, 4, 7, 12] +squares = [x**2 for x in values if x <= 10] +print squares +``` + + [100, 16, 49] + + +也可以使用推导式生成集合和字典: + + +```python +square_set = {x**2 for x in values if x <= 10} +print(square_set) +square_dict = {x: x**2 for x in values if x <= 10} +print(square_dict) +``` + + set([16, 49, 100]) + {10: 100, 4: 16, 7: 49} + + +再如,计算上面例子中生成的列表中所有元素的和: + + +```python +total = sum([x**2 for x in values if x <= 10]) +print(total) +``` + + 165 + + +但是,**Python**会生成这个列表,然后在将它放到垃圾回收机制中(因为没有变量指向它),这毫无疑问是种浪费。 + +为了解决这种问题,与xrange()类似,**Python**使用产生式表达式来解决这个问题: + + +```python +total = sum(x**2 for x in values if x <= 10) +print(total) +``` + + 165 + + +与上面相比,只是去掉了括号,但这里并不会一次性的生成这个列表。 + +比较一下两者的用时: + + +```python +x = range(1000000) +``` + + +```python +%timeit total = sum([i**2 for i in x]) +``` + + 1 loops, best of 3: 3.86 s per loop + + + +```python +%timeit total = sum(i**2 for i in x) +``` + + 1 loops, best of 3: 2.58 s per loop + diff --git a/docs/02-python-essentials/02.17-functions.md b/docs/02-python-essentials/02.17-functions.md new file mode 100644 index 00000000..bb068fba --- /dev/null +++ b/docs/02-python-essentials/02.17-functions.md @@ -0,0 +1,365 @@ + +# 函数 + +## 定义函数 + +函数`function`,通常接受输入参数,并有返回值。 + +它负责完成某项特定任务,而且相较于其他代码,具备相对的独立性。 + + +```python +def add(x, y): + """Add two numbers""" + a = x + y + return a +``` + +函数通常有一下几个特征: +- 使用 `def` 关键词来定义一个函数。 +- `def` 后面是函数的名称,括号中是函数的参数,不同的参数用 `,` 隔开, `def foo():` 的形式是必须要有的,参数可以为空; +- 使用缩进来划分函数的内容; +- `docstring` 用 `"""` 包含的字符串,用来解释函数的用途,可省略; +- `return` 返回特定的值,如果省略,返回 `None` 。 + +## 使用函数 + +使用函数时,只需要将参数换成特定的值传给函数。 + +**Python**并没有限定参数的类型,因此可以使用不同的参数类型: + + +```python +print add(2, 3) +print add('foo', 'bar') +``` + + 5 + foobar + + +在这个例子中,如果传入的两个参数不可以相加,那么**Python**会将报错: + + +```python +print add(2, "foo") +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 print add(2, "foo") + + + in add(x, y) + 1 def add(x, y): + 2 """Add two numbers""" + ----> 3 a = x + y + 4 return a + + + TypeError: unsupported operand type(s) for +: 'int' and 'str' + + +如果传入的参数数目与实际不符合,也会报错: + + +```python +print add(1, 2, 3) +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 print add(1, 2, 3) + + + TypeError: add() takes exactly 2 arguments (3 given) + + + +```python +print add(1) +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 print add(1) + + + TypeError: add() takes exactly 2 arguments (1 given) + + +传入参数时,Python提供了两种选项,第一种是上面使用的按照位置传入参数,另一种则是使用关键词模式,显式地指定参数的值: + + +```python +print add(x=2, y=3) +print add(y="foo", x="bar") +``` + + 5 + barfoo + + +可以混合这两种模式: + + +```python +print add(2, y=3) +``` + + 5 + + +## 设定参数默认值 + +可以在函数定义的时候给参数设定默认值,例如: + + +```python +def quad(x, a=1, b=0, c=0): + return a*x**2 + b*x + c +``` + +可以省略有默认值的参数: + + +```python +print quad(2.0) +``` + + 4.0 + + +可以修改参数的默认值: + + +```python +print quad(2.0, b=3) +``` + + 10.0 + + + +```python +print quad(2.0, 2, c=4) +``` + + 12.0 + + +这里混合了位置和指定两种参数传入方式,第二个2是传给 `a` 的。 + +注意,在使用混合语法时,要注意不能给同一个值赋值多次,否则会报错,例如: + + +```python +print quad(2.0, 2, a=2) +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 print quad(2.0, 2, a=2) + + + TypeError: quad() got multiple values for keyword argument 'a' + + +## 接收不定参数 + +使用如下方法,可以使函数接受不定数目的参数: + + +```python +def add(x, *args): + total = x + for arg in args: + total += arg + return total +``` + +这里,`*args` 表示参数数目不定,可以看成一个元组,把第一个参数后面的参数当作元组中的元素。 + + +```python +print add(1, 2, 3, 4) +print add(1, 2) +``` + + 10 + 3 + + +这样定义的函数不能使用关键词传入参数,要使用关键词,可以这样: + + +```python +def add(x, **kwargs): + total = x + for arg, value in kwargs.items(): + print "adding ", arg + total += value + return total +``` + +这里, `**kwargs` 表示参数数目不定,相当于一个字典,关键词和值对应于键值对。 + + +```python +print add(10, y=11, z=12, w=13) +``` + + adding y + adding z + adding w + 46 + + +再看这个例子,可以接收任意数目的位置参数和键值对参数: + + +```python +def foo(*args, **kwargs): + print args, kwargs + +foo(2, 3, x='bar', z=10) +``` + + (2, 3) {'x': 'bar', 'z': 10} + + +不过要按顺序传入参数,先传入位置参数 `args` ,在传入关键词参数 `kwargs` 。 + +## 返回多个值 + +函数可以返回多个值: + + +```python +from math import atan2 + +def to_polar(x, y): + r = (x**2 + y**2) ** 0.5 + theta = atan2(y, x) + return r, theta + +r, theta = to_polar(3, 4) +print r, theta +``` + + 5.0 0.927295218002 + + +事实上,**Python**将返回的两个值变成了元组: + + +```python +print to_polar(3, 4) +``` + + (5.0, 0.9272952180016122) + + +因为这个元组中有两个值,所以可以使用 + + r, theta = to_polar(3, 4) + +给两个值赋值。 + +列表也有相似的功能: + + +```python +a, b, c = [1, 2, 3] +print a, b, c +``` + + 1 2 3 + + +事实上,不仅仅返回值可以用元组表示,也可以将参数用元组以这种方式传入: + + +```python +def add(x, y): + """Add two numbers""" + a = x + y + return a + +z = (2, 3) +print add(*z) +``` + + 5 + + +这里的`*`必不可少。 + +事实上,还可以通过字典传入参数来执行函数: + + +```python +def add(x, y): + """Add two numbers""" + a = x + y + return a + +w = {'x': 2, 'y': 3} +print add(**w) +``` + + 5 + + +## map 方法生成序列 + +可以通过 `map` 的方式利用函数来生成序列: + + +```python +def sqr(x): + return x ** 2 + +a = [2,3,4] +print map(sqr, a) +``` + + [4, 9, 16] + + +其用法为: + + map(aFun, aSeq) + +将函数 `aFun` 应用到序列 `aSeq` 上的每一个元素上,返回一个列表,不管这个序列原来是什么类型。 + +事实上,根据函数参数的多少,`map` 可以接受多组序列,将其对应的元素作为参数传入函数: + + +```python +def add(x, y): + return x + y + +a = (2,3,4) +b = [10,5,3] +print map(add,a,b) +``` + + [12, 8, 7] + diff --git a/docs/02-python-essentials/02.18-modules-and-packages.md b/docs/02-python-essentials/02.18-modules-and-packages.md new file mode 100644 index 00000000..746f0fcb --- /dev/null +++ b/docs/02-python-essentials/02.18-modules-and-packages.md @@ -0,0 +1,311 @@ + +# 模块和包 + +## 模块 + +Python会将所有 `.py` 结尾的文件认定为Python代码文件,考虑下面的脚本 `ex1.py` : + + +```python +%%writefile ex1.py + +PI = 3.1416 + +def sum(lst): + tot = lst[0] + for value in lst[1:]: + tot = tot + value + return tot + +w = [0, 1, 2, 3] +print sum(w), PI +``` + + Overwriting ex1.py + + +可以执行它: + + +```python +%run ex1.py +``` + + 6 3.1416 + + +这个脚本可以当作一个模块,可以使用`import`关键词加载并执行它(这里要求`ex1.py`在当前工作目录): + + +```python +import ex1 +``` + + 6 3.1416 + + + +```python +ex1 +``` + + + + + + + + +在导入时,**Python**会执行一遍模块中的所有内容。 + +`ex1.py` 中所有的变量都被载入了当前环境中,不过要使用 + + ex1.变量名 + +的方法来查看或者修改这些变量: + + +```python +print ex1.PI +``` + + 3.1416 + + + +```python +ex1.PI = 3.141592653 +print ex1.PI +``` + + 3.141592653 + + +还可以用 + + ex1.函数名 + +调用模块里面的函数: + + +```python +print ex1.sum([2, 3, 4]) +``` + + 9 + + +为了提高效率,**Python**只会载入模块一次,已经载入的模块再次载入时,Python并不会真正执行载入操作,哪怕模块的内容已经改变。 + +例如,这里重新导入 `ex1` 时,并不会执行 `ex1.py` 中的 `print` 语句: + + +```python +import ex1 +``` + +需要重新导入模块时,可以使用`reload`强制重新载入它,例如: + + +```python +reload(ex1) +``` + + 6 3.1416 + + + + + + + + + +删除之前生成的文件: + + +```python +import os +os.remove('ex1.py') +``` + +## `__name__` 属性 + +有时候我们想将一个 `.py` 文件既当作脚本,又能当作模块用,这个时候可以使用 `__name__` 这个属性。 + +只有当文件被当作脚本执行的时候, `__name__`的值才会是 `'__main__'`,所以我们可以: + + +```python +%%writefile ex2.py + +PI = 3.1416 + +def sum(lst): + """ Sum the values in a list + """ + tot = 0 + for value in lst: + tot = tot + value + return tot + +def add(x, y): + " Add two values." + a = x + y + return a + +def test(): + w = [0,1,2,3] + assert(sum(w) == 6) + print 'test passed.' + +if __name__ == '__main__': + test() +``` + + Writing ex2.py + + +运行文件: + + +```python +%run ex2.py +``` + + test passed. + + +当作模块导入, `test()` 不会执行: + + +```python +import ex2 +``` + +但是可以使用其中的变量: + + +```python +ex2.PI +``` + + + + + 3.1416 + + + +使用别名: + + +```python +import ex2 as e2 +e2.PI +``` + + + + + 3.1416 + + + +## 其他导入方法 + +可以从模块中导入变量: + + +```python +from ex2 import add, PI +``` + +使用 `from` 后,可以直接使用 `add` , `PI`: + + +```python +add(2, 3) +``` + + + + + 5 + + + +或者使用 `*` 导入所有变量: + + +```python +from ex2 import * +add(3, 4.5) +``` + + + + + 7.5 + + + +这种导入方法不是很提倡,因为如果你不确定导入的都有哪些,可能覆盖一些已有的函数。 + +删除文件: + + +```python +import os +os.remove('ex2.py') +``` + +## 包 + +假设我们有这样的一个文件夹: + +foo/ +- `__init__.py` +- `bar.py` (defines func) +- `baz.py` (defines zap) + +这意味着 foo 是一个包,我们可以这样导入其中的内容: + +```python +from foo.bar import func +from foo.baz import zap +``` + +`bar` 和 `baz` 都是 `foo` 文件夹下的 `.py` 文件。 + +导入包要求: +- 文件夹 `foo` 在**Python**的搜索路径中 +- `__init__.py` 表示 `foo` 是一个包,它可以是个空文件。 + +## 常用的标准库 + +- re 正则表达式 +- copy 复制 +- math, cmath 数学 +- decimal, fraction +- sqlite3 数据库 +- os, os.path 文件系统 +- gzip, bz2, zipfile, tarfile 压缩文件 +- csv, netrc 各种文件格式 +- xml +- htmllib +- ftplib, socket +- cmd 命令行 +- pdb +- profile, cProfile, timeit +- collections, heapq, bisect 数据结构 +- mmap +- threading, Queue 并行 +- multiprocessing +- subprocess +- pickle, cPickle +- struct + +## PYTHONPATH设置 + +Python的搜索路径可以通过环境变量PYTHONPATH设置,环境变量的设置方法依操作系统的不同而不同,具体方法可以网上搜索。 diff --git a/docs/02-python-essentials/02.19-exceptions.md b/docs/02-python-essentials/02.19-exceptions.md new file mode 100644 index 00000000..137c9fb2 --- /dev/null +++ b/docs/02-python-essentials/02.19-exceptions.md @@ -0,0 +1,476 @@ + +# 异常 + +## try & except 块 + +写代码的时候,出现错误必不可免,即使代码没有问题,也可能遇到别的问题。 + +看下面这段代码: + +```python +import math + +while True: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = math.log10(x) + print "log10({0}) = {1}".format(x, y) +``` + +这段代码接收命令行的输入,当输入为数字时,计算它的对数并输出,直到输入值为 `q` 为止。 + +乍看没什么问题,然而当我们输入0或者负数时: + + +```python +import math + +while True: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = math.log10(x) + print "log10({0}) = {1}".format(x, y) +``` + + > -1 + + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + 6 break + 7 x = float(text) + ----> 8 y = math.log10(x) + 9 print "log10({0}) = {1}".format(x, y) + + + ValueError: math domain error + + +`log10` 函数会报错,因为不能接受非正值。 + +一旦报错,程序就会停止执行,如果不希望程序停止执行,那么我们可以添加一对 `try & except`: + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = math.log10(x) + print "log10({0}) = {1}".format(x, y) + except ValueError: + print "the value must be greater than 0" +``` + +一旦 `try` 块中的内容出现了异常,那么 `try` 块后面的内容会被忽略,**Python**会寻找 `except` 里面有没有对应的内容,如果找到,就执行对应的块,没有则抛出这个异常。 + +在上面的例子中,`try` 抛出的是 `ValueError`,`except` 中有对应的内容,所以这个异常被 `except` 捕捉到,程序可以继续执行: + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = math.log10(x) + print "log10({0}) = {1}".format(x, y) + except ValueError: + print "the value must be greater than 0" +``` + + > -1 + the value must be greater than 0 + > 0 + the value must be greater than 0 + > 1 + log10(1.0) = 0.0 + > q + + +## 捕捉不同的错误类型 + +``` python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "log10({0}) = {1}".format(x, y) + except ValueError: + print "the value must be greater than 0" +``` + +假设我们将这里的 `y` 更改为 `1 / math.log10(x)`,此时输入 `1`: + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "log10({0}) = {1}".format(x, y) + except ValueError: + print "the value must be greater than 0" +``` + + > 1 + + + + --------------------------------------------------------------------------- + + ZeroDivisionError Traceback (most recent call last) + + in () + 7 break + 8 x = float(text) + ----> 9 y = 1 / math.log10(x) + 10 print "log10({0}) = {1}".format(x, y) + 11 except ValueError: + + + ZeroDivisionError: float division by zero + + +因为我们的 `except` 里面并没有 `ZeroDivisionError`,所以会抛出这个异常,我们可以通过两种方式解决这个问题: + +## 捕捉所有异常 + +将`except` 的值改成 `Exception` 类,来捕获所有的异常。 + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "1 / log10({0}) = {1}".format(x, y) + except Exception: + print "invalid value" +``` + + > 1 + invalid value + > 0 + invalid value + > -1 + invalid value + > 2 + 1 / log10(2.0) = 3.32192809489 + > q + + +## 指定特定值 + +这里,我们把 `ZeroDivisionError` 加入 `except` 。 + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "1 / log10({0}) = {1}".format(x, y) + except (ValueError, ZeroDivisionError): + print "invalid value" +``` + + > 1 + invalid value + > -1 + invalid value + > 0 + invalid value + > q + + +或者另加处理: + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "1 / log10({0}) = {1}".format(x, y) + except ValueError: + print "the value must be greater than 0" + except ZeroDivisionError: + print "the value must not be 1" +``` + + > 1 + the value must not be 1 + > -1 + the value must be greater than 0 + > 0 + the value must be greater than 0 + > 2 + 1 / log10(2.0) = 3.32192809489 + > q + + +事实上,我们还可以将这两种方式结合起来,用 `Exception` 来捕捉其他的错误: + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "1 / log10({0}) = {1}".format(x, y) + except ValueError: + print "the value must be greater than 0" + except ZeroDivisionError: + print "the value must not be 1" + except Exception: + print "unexpected error" +``` + + > 1 + the value must not be 1 + > -1 + the value must be greater than 0 + > 0 + the value must be greater than 0 + > q + + +## 得到异常的具体信息 + +在上面的例子中,当我们输入不能转换为浮点数的字符串时,它输出的是 `the value must be greater than 0`,这并没有反映出实际情况。 + + +```python +float('a') +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + ----> 1 float('a') + + + ValueError: could not convert string to float: a + + +为了得到异常的具体信息,我们将这个 `ValueError` 具现化: + + +```python +import math + +while True: + try: + text = raw_input('> ') + if text[0] == 'q': + break + x = float(text) + y = 1 / math.log10(x) + print "1 / log10({0}) = {1}".format(x, y) + except ValueError as exc: + if exc.message == "math domain error": + print "the value must be greater than 0" + else: + print "could not convert '%s' to float" % text + except ZeroDivisionError: + print "the value must not be 1" + except Exception as exc: + print "unexpected error:", exc.message +``` + + > 1 + the value must not be 1 + > -1 + the value must be greater than 0 + > aa + could not convert 'aa' to float + > q + + +同时,我们也将捕获的其他异常的信息显示出来。 + +这里,`exc.message` 显示的内容是异常对应的说明,例如 + + ValueError: could not convert string to float: a + +对应的 `message` 是 + + could not convert string to float: a + +当我们使用 `except Exception` 时,会捕获所有的 `Exception` 和它派生出来的子类,但不是所有的异常都是从 `Exception` 类派生出来的,可能会出现一些不能捕获的情况,因此,更加一般的做法是使用这样的形式: + +```python +try: + pass +except: + pass +``` + +这样不指定异常的类型会捕获所有的异常,但是这样的形式并不推荐。 + +## 自定义异常 + +异常是标准库中的类,这意味着我们可以自定义异常类: + + +```python +class CommandError(ValueError): + pass +``` + +这里我们定义了一个继承自 `ValueError` 的异常类,异常类一般接收一个字符串作为输入,并把这个字符串当作异常信息,例如: + + +```python +valid_commands = {'start', 'stop', 'pause'} + +while True: + command = raw_input('> ') + if command.lower() not in valid_commands: + raise CommandError('Invalid commmand: %s' % command) +``` + + > bad command + + + + --------------------------------------------------------------------------- + + CommandError Traceback (most recent call last) + + in () + 4 command = raw_input('> ') + 5 if command.lower() not in valid_commands: + ----> 6 raise CommandError('Invalid commmand: %s' % command) + + + CommandError: Invalid commmand: bad command + + +我们使用 `raise` 关键词来抛出异常。 + +我们可以使用 `try/except` 块来捕捉这个异常: + +``` python +valid_commands = {'start', 'stop', 'pause'} + +while True: + command = raw_input('> ') + try: + if command.lower() not in valid_commands: + raise CommandError('Invalid commmand: %s' % command) + except CommandError: + print 'Bad command string: "%s"' % command +``` + +由于 `CommandError` 继承自 `ValueError`,我们也可以使用 `except ValueError` 来捕获这个异常。 + +## finally + +try/catch 块还有一个可选的关键词 finally。 + +不管 try 块有没有异常, finally 块的内容总是会被执行,而且会在抛出异常前执行,因此可以用来作为安全保证,比如确保打开的文件被关闭。。 + + +```python +try: + print 1 +finally: + print 'finally was called.' +``` + + 1 + finally was called. + + +在抛出异常前执行: + + +```python +try: + print 1 / 0 +finally: + print 'finally was called.' +``` + + finally was called. + + + + --------------------------------------------------------------------------- + + ZeroDivisionError Traceback (most recent call last) + + in () + 1 try: + ----> 2 print 1 / 0 + 3 finally: + 4 print 'finally was called.' + + + ZeroDivisionError: integer division or modulo by zero + + +如果异常被捕获了,在最后执行: + + +```python +try: + print 1 / 0 +except ZeroDivisionError: + print 'divide by 0.' +finally: + print 'finally was called.' +``` + + divide by 0. + finally was called. + diff --git a/docs/02-python-essentials/02.20-warnings.md b/docs/02-python-essentials/02.20-warnings.md new file mode 100644 index 00000000..08553163 --- /dev/null +++ b/docs/02-python-essentials/02.20-warnings.md @@ -0,0 +1,41 @@ + +# 警告 + +出现了一些需要让用户知道的问题,但又不想停止程序,这时候我们可以使用警告: + +首先导入警告模块: + + +```python +import warnings +``` + +在需要的地方,我们使用 `warnings` 中的 `warn` 函数: + + warn(msg, WarningType = UserWarning) + + +```python +def month_warning(m): + if not 1<= m <= 12: + msg = "month (%d) is not between 1 and 12" % m + warnings.warn(msg, RuntimeWarning) + +month_warning(13) +``` + + c:\Anaconda\lib\site-packages\IPython\kernel\__main__.py:4: RuntimeWarning: month (13) is not between 1 and 12 + + +有时候我们想要忽略特定类型的警告,可以使用 `warnings` 的 `filterwarnings` 函数: + + filterwarnings(action, category) + +将 `action` 设置为 `'ignore'` 便可以忽略特定类型的警告: + + +```python +warnings.filterwarnings(action = 'ignore', category = RuntimeWarning) + +month_warning(13) +``` diff --git a/docs/02-python-essentials/02.21-file-IO.md b/docs/02-python-essentials/02.21-file-IO.md new file mode 100644 index 00000000..6ed1678c --- /dev/null +++ b/docs/02-python-essentials/02.21-file-IO.md @@ -0,0 +1,6119 @@ + +# 文件读写 + +写入测试文件: + + +```python +%%writefile test.txt +this is a test file. +hello world! +python is good! +today is a good day. +``` + + Writing test.txt + + +## 读文件 + +使用 `open` 函数或者 `file` 函数来读文件,使用文件名的字符串作为输入参数: + + +```python +f = open('test.txt') +``` + + +```python +f = file('test.txt') +``` + +这两种方式没有太大区别。 + +默认以读的方式打开文件,如果文件不存在会报错。 + +可以使用 `read` 方法来读入文件中的所有内容: + + +```python +text = f.read() +print text +``` + + this is a test file. + hello world! + python is good! + today is a good day. + + +也可以按照行读入内容,`readlines` 方法返回一个列表,每个元素代表文件中每一行的内容: + + +```python +f = open('test.txt') +lines = f.readlines() +print lines +``` + + ['this is a test file.\n', 'hello world!\n', 'python is good!\n', 'today is a good day.'] + + +使用完文件之后,需要将文件关闭。 + + +```python +f.close() +``` + +事实上,我们可以将 `f` 放在一个循环中,得到它每一行的内容: + + +```python +f = open('test.txt') +for line in f: + print line +f.close() +``` + + this is a test file. + + hello world! + + python is good! + + today is a good day. + + +删除刚才创建的文件: + + +```python +import os +os.remove('test.txt') +``` + +## 写文件 + +我们使用 `open` 函数的写入模式来写文件: + + +```python +f = open('myfile.txt', 'w') +f.write('hello world!') +f.close() +``` + +使用 `w` 模式时,如果文件不存在会被创建,我们可以查看是否真的写入成功: + + +```python +print open('myfile.txt').read() +``` + + hello world! + + +如果文件已经存在, `w` 模式会覆盖之前写的所有内容: + + +```python +f = open('myfile.txt', 'w') +f.write('another hello world!') +f.close() +print open('myfile.txt').read() +``` + + another hello world! + + +除了写入模式,还有追加模式 `a` ,追加模式不会覆盖之前已经写入的内容,而是在之后继续写入: + + +```python +f = open('myfile.txt', 'a') +f.write('... and more') +f.close() +print open('myfile.txt').read() +``` + + another hello world!... and more + + +写入结束之后一定要将文件关闭,否则可能出现内容没有完全写入文件中的情况。 + +还可以使用读写模式 `w+`: + + +```python +f = open('myfile.txt', 'w+') +f.write('hello world!') +f.seek(6) +print f.read() +f.close() +``` + + world! + + +这里 `f.seek(6)` 移动到文件的第6个字符处,然后 `f.read()` 读出剩下的内容。 + + +```python +import os +os.remove('myfile.txt') +``` + +## 二进制文件 + +二进制读写模式 b: + + +```python +import os +f = open('binary.bin', 'wb') +f.write(os.urandom(16)) +f.close() + +f = open('binary.bin', 'rb') +print repr(f.read()) +f.close() +``` + + '\x86H\x93\xe1\xd8\xef\xc0\xaa(\x17\xa9\xc9\xa51\xf1\x98' + + + +```python +import os +os.remove('binary.bin') +``` + +## 换行符 + +不同操作系统的换行符可能不同: + +- `\r` +- `\n` +- `\r\n` + +使用 `U` 选项,可以将这三个统一看成 `\n` 换行符。 + +## 关闭文件 + +在**Python**中,如果一个打开的文件不再被其他变量引用时,它会自动关闭这个文件。 + +所以正常情况下,如果一个文件正常被关闭了,忘记调用文件的 `close` 方法不会有什么问题。 + +关闭文件可以保证内容已经被写入文件,而不关闭可能会出现意想不到的结果: + + +```python +f = open('newfile.txt','w') +f.write('hello world') +g = open('newfile.txt', 'r') +print repr(g.read()) +``` + + '' + + +虽然这里写了内容,但是在关闭之前,这个内容并没有被写入磁盘。 + +使用循环写入的内容也并不完整: + + +```python +f = open('newfile.txt','w') +for i in range(3000): + f.write('hello world: ' + str(i) + '\n') + +g = open('newfile.txt', 'r') +print g.read() +f.close() +g.close() +``` + + hello world: 0 + hello world: 1 + hello world: 2 + hello world: 3 + hello world: 4 + hello world: 5 + hello world: 6 + hello world: 7 + hello world: 8 + hello world: 9 + hello world: 10 + hello world: 11 + hello world: 12 + hello world: 13 + hello world: 14 + hello world: 15 + hello world: 16 + hello world: 17 + hello world: 18 + hello world: 19 + hello world: 20 + hello world: 21 + hello world: 22 + hello world: 23 + hello world: 24 + hello world: 25 + hello world: 26 + hello world: 27 + hello world: 28 + hello world: 29 + hello world: 30 + hello world: 31 + hello world: 32 + hello world: 33 + hello world: 34 + hello world: 35 + hello world: 36 + hello world: 37 + hello world: 38 + hello world: 39 + hello world: 40 + hello world: 41 + hello world: 42 + hello world: 43 + hello world: 44 + hello world: 45 + hello world: 46 + hello world: 47 + hello world: 48 + hello world: 49 + hello world: 50 + hello world: 51 + hello world: 52 + hello world: 53 + hello world: 54 + hello world: 55 + hello world: 56 + hello world: 57 + hello world: 58 + hello world: 59 + hello world: 60 + hello world: 61 + hello world: 62 + hello world: 63 + hello world: 64 + hello world: 65 + hello world: 66 + hello world: 67 + hello world: 68 + hello world: 69 + hello world: 70 + hello world: 71 + hello world: 72 + hello world: 73 + hello world: 74 + hello world: 75 + hello world: 76 + hello world: 77 + hello world: 78 + hello world: 79 + hello world: 80 + hello world: 81 + hello world: 82 + hello world: 83 + hello world: 84 + hello world: 85 + hello world: 86 + hello world: 87 + hello world: 88 + hello world: 89 + hello world: 90 + hello world: 91 + hello world: 92 + hello world: 93 + hello world: 94 + hello world: 95 + hello world: 96 + hello world: 97 + hello world: 98 + hello world: 99 + hello world: 100 + hello world: 101 + hello world: 102 + hello world: 103 + hello world: 104 + hello world: 105 + hello world: 106 + hello world: 107 + hello world: 108 + hello world: 109 + hello world: 110 + hello world: 111 + hello world: 112 + hello world: 113 + hello world: 114 + hello world: 115 + hello world: 116 + hello world: 117 + hello world: 118 + hello world: 119 + hello world: 120 + hello world: 121 + hello world: 122 + hello world: 123 + hello world: 124 + hello world: 125 + hello world: 126 + hello world: 127 + hello world: 128 + hello world: 129 + hello world: 130 + hello world: 131 + hello world: 132 + hello world: 133 + hello world: 134 + hello world: 135 + hello world: 136 + hello world: 137 + hello world: 138 + hello world: 139 + hello world: 140 + hello world: 141 + hello world: 142 + hello world: 143 + hello world: 144 + hello world: 145 + hello world: 146 + hello world: 147 + hello world: 148 + hello world: 149 + hello world: 150 + hello world: 151 + hello world: 152 + hello world: 153 + hello world: 154 + hello world: 155 + hello world: 156 + hello world: 157 + hello world: 158 + hello world: 159 + hello world: 160 + hello world: 161 + hello world: 162 + hello world: 163 + hello world: 164 + hello world: 165 + hello world: 166 + hello world: 167 + hello world: 168 + hello world: 169 + hello world: 170 + hello world: 171 + hello world: 172 + hello world: 173 + hello world: 174 + hello world: 175 + hello world: 176 + hello world: 177 + hello world: 178 + hello world: 179 + hello world: 180 + hello world: 181 + hello world: 182 + hello world: 183 + hello world: 184 + hello world: 185 + hello world: 186 + hello world: 187 + hello world: 188 + hello world: 189 + hello world: 190 + hello world: 191 + hello world: 192 + hello world: 193 + hello world: 194 + hello world: 195 + hello world: 196 + hello world: 197 + hello world: 198 + hello world: 199 + hello world: 200 + hello world: 201 + hello world: 202 + hello world: 203 + hello world: 204 + hello world: 205 + hello world: 206 + hello world: 207 + hello world: 208 + hello world: 209 + hello world: 210 + hello world: 211 + hello world: 212 + hello world: 213 + hello world: 214 + hello world: 215 + hello world: 216 + hello world: 217 + hello world: 218 + hello world: 219 + hello world: 220 + hello world: 221 + hello world: 222 + hello world: 223 + hello world: 224 + hello world: 225 + hello world: 226 + hello world: 227 + hello world: 228 + hello world: 229 + hello world: 230 + hello world: 231 + hello world: 232 + hello world: 233 + hello world: 234 + hello world: 235 + hello world: 236 + hello world: 237 + hello world: 238 + hello world: 239 + hello world: 240 + hello world: 241 + hello world: 242 + hello world: 243 + hello world: 244 + hello world: 245 + hello world: 246 + hello world: 247 + hello world: 248 + hello world: 249 + hello world: 250 + hello world: 251 + hello world: 252 + hello world: 253 + hello world: 254 + hello world: 255 + hello world: 256 + hello world: 257 + hello world: 258 + hello world: 259 + hello world: 260 + hello world: 261 + hello world: 262 + hello world: 263 + hello world: 264 + hello world: 265 + hello world: 266 + hello world: 267 + hello world: 268 + hello world: 269 + hello world: 270 + hello world: 271 + hello world: 272 + hello world: 273 + hello world: 274 + hello world: 275 + hello world: 276 + hello world: 277 + hello world: 278 + hello world: 279 + hello world: 280 + hello world: 281 + hello world: 282 + hello world: 283 + hello world: 284 + hello world: 285 + hello world: 286 + hello world: 287 + hello world: 288 + hello world: 289 + hello world: 290 + hello world: 291 + hello world: 292 + hello world: 293 + hello world: 294 + hello world: 295 + hello world: 296 + hello world: 297 + hello world: 298 + hello world: 299 + hello world: 300 + hello world: 301 + hello world: 302 + hello world: 303 + hello world: 304 + hello world: 305 + hello world: 306 + hello world: 307 + hello world: 308 + hello world: 309 + hello world: 310 + hello world: 311 + hello world: 312 + hello world: 313 + hello world: 314 + hello world: 315 + hello world: 316 + hello world: 317 + hello world: 318 + hello world: 319 + hello world: 320 + hello world: 321 + hello world: 322 + hello world: 323 + hello world: 324 + hello world: 325 + hello world: 326 + hello world: 327 + hello world: 328 + hello world: 329 + hello world: 330 + hello world: 331 + hello world: 332 + hello world: 333 + hello world: 334 + hello world: 335 + hello world: 336 + hello world: 337 + hello world: 338 + hello world: 339 + hello world: 340 + hello world: 341 + hello world: 342 + hello world: 343 + hello world: 344 + hello world: 345 + hello world: 346 + hello world: 347 + hello world: 348 + hello world: 349 + hello world: 350 + hello world: 351 + hello world: 352 + hello world: 353 + hello world: 354 + hello world: 355 + hello world: 356 + hello world: 357 + hello world: 358 + hello world: 359 + hello world: 360 + hello world: 361 + hello world: 362 + hello world: 363 + hello world: 364 + hello world: 365 + hello world: 366 + hello world: 367 + hello world: 368 + hello world: 369 + hello world: 370 + hello world: 371 + hello world: 372 + hello world: 373 + hello world: 374 + hello world: 375 + hello world: 376 + hello world: 377 + hello world: 378 + hello world: 379 + hello world: 380 + hello world: 381 + hello world: 382 + hello world: 383 + hello world: 384 + hello world: 385 + hello world: 386 + hello world: 387 + hello world: 388 + hello world: 389 + hello world: 390 + hello world: 391 + hello world: 392 + hello world: 393 + hello world: 394 + hello world: 395 + hello world: 396 + hello world: 397 + hello world: 398 + hello world: 399 + hello world: 400 + hello world: 401 + hello world: 402 + hello world: 403 + hello world: 404 + hello world: 405 + hello world: 406 + hello world: 407 + hello world: 408 + hello world: 409 + hello world: 410 + hello world: 411 + hello world: 412 + hello world: 413 + hello world: 414 + hello world: 415 + hello world: 416 + hello world: 417 + hello world: 418 + hello world: 419 + hello world: 420 + hello world: 421 + hello world: 422 + hello world: 423 + hello world: 424 + hello world: 425 + hello world: 426 + hello world: 427 + hello world: 428 + hello world: 429 + hello world: 430 + hello world: 431 + hello world: 432 + hello world: 433 + hello world: 434 + hello world: 435 + hello world: 436 + hello world: 437 + hello world: 438 + hello world: 439 + hello world: 440 + hello world: 441 + hello world: 442 + hello world: 443 + hello world: 444 + hello world: 445 + hello world: 446 + hello world: 447 + hello world: 448 + hello world: 449 + hello world: 450 + hello world: 451 + hello world: 452 + hello world: 453 + hello world: 454 + hello world: 455 + hello world: 456 + hello world: 457 + hello world: 458 + hello world: 459 + hello world: 460 + hello world: 461 + hello world: 462 + hello world: 463 + hello world: 464 + hello world: 465 + hello world: 466 + hello world: 467 + hello world: 468 + hello world: 469 + hello world: 470 + hello world: 471 + hello world: 472 + hello world: 473 + hello world: 474 + hello world: 475 + hello world: 476 + hello world: 477 + hello world: 478 + hello world: 479 + hello world: 480 + hello world: 481 + hello world: 482 + hello world: 483 + hello world: 484 + hello world: 485 + hello world: 486 + hello world: 487 + hello world: 488 + hello world: 489 + hello world: 490 + hello world: 491 + hello world: 492 + hello world: 493 + hello world: 494 + hello world: 495 + hello world: 496 + hello world: 497 + hello world: 498 + hello world: 499 + hello world: 500 + hello world: 501 + hello world: 502 + hello world: 503 + hello world: 504 + hello world: 505 + hello world: 506 + hello world: 507 + hello world: 508 + hello world: 509 + hello world: 510 + hello world: 511 + hello world: 512 + hello world: 513 + hello world: 514 + hello world: 515 + hello world: 516 + hello world: 517 + hello world: 518 + hello world: 519 + hello world: 520 + hello world: 521 + hello world: 522 + hello world: 523 + hello world: 524 + hello world: 525 + hello world: 526 + hello world: 527 + hello world: 528 + hello world: 529 + hello world: 530 + hello world: 531 + hello world: 532 + hello world: 533 + hello world: 534 + hello world: 535 + hello world: 536 + hello world: 537 + hello world: 538 + hello world: 539 + hello world: 540 + hello world: 541 + hello world: 542 + hello world: 543 + hello world: 544 + hello world: 545 + hello world: 546 + hello world: 547 + hello world: 548 + hello world: 549 + hello world: 550 + hello world: 551 + hello world: 552 + hello world: 553 + hello world: 554 + hello world: 555 + hello world: 556 + hello world: 557 + hello world: 558 + hello world: 559 + hello world: 560 + hello world: 561 + hello world: 562 + hello world: 563 + hello world: 564 + hello world: 565 + hello world: 566 + hello world: 567 + hello world: 568 + hello world: 569 + hello world: 570 + hello world: 571 + hello world: 572 + hello world: 573 + hello world: 574 + hello world: 575 + hello world: 576 + hello world: 577 + hello world: 578 + hello world: 579 + hello world: 580 + hello world: 581 + hello world: 582 + hello world: 583 + hello world: 584 + hello world: 585 + hello world: 586 + hello world: 587 + hello world: 588 + hello world: 589 + hello world: 590 + hello world: 591 + hello world: 592 + hello world: 593 + hello world: 594 + hello world: 595 + hello world: 596 + hello world: 597 + hello world: 598 + hello world: 599 + hello world: 600 + hello world: 601 + hello world: 602 + hello world: 603 + hello world: 604 + hello world: 605 + hello world: 606 + hello world: 607 + hello world: 608 + hello world: 609 + hello world: 610 + hello world: 611 + hello world: 612 + hello world: 613 + hello world: 614 + hello world: 615 + hello world: 616 + hello world: 617 + hello world: 618 + hello world: 619 + hello world: 620 + hello world: 621 + hello world: 622 + hello world: 623 + hello world: 624 + hello world: 625 + hello world: 626 + hello world: 627 + hello world: 628 + hello world: 629 + hello world: 630 + hello world: 631 + hello world: 632 + hello world: 633 + hello world: 634 + hello world: 635 + hello world: 636 + hello world: 637 + hello world: 638 + hello world: 639 + hello world: 640 + hello world: 641 + hello world: 642 + hello world: 643 + hello world: 644 + hello world: 645 + hello world: 646 + hello world: 647 + hello world: 648 + hello world: 649 + hello world: 650 + hello world: 651 + hello world: 652 + hello world: 653 + hello world: 654 + hello world: 655 + hello world: 656 + hello world: 657 + hello world: 658 + hello world: 659 + hello world: 660 + hello world: 661 + hello world: 662 + hello world: 663 + hello world: 664 + hello world: 665 + hello world: 666 + hello world: 667 + hello world: 668 + hello world: 669 + hello world: 670 + hello world: 671 + hello world: 672 + hello world: 673 + hello world: 674 + hello world: 675 + hello world: 676 + hello world: 677 + hello world: 678 + hello world: 679 + hello world: 680 + hello world: 681 + hello world: 682 + hello world: 683 + hello world: 684 + hello world: 685 + hello world: 686 + hello world: 687 + hello world: 688 + hello world: 689 + hello world: 690 + hello world: 691 + hello world: 692 + hello world: 693 + hello world: 694 + hello world: 695 + hello world: 696 + hello world: 697 + hello world: 698 + hello world: 699 + hello world: 700 + hello world: 701 + hello world: 702 + hello world: 703 + hello world: 704 + hello world: 705 + hello world: 706 + hello world: 707 + hello world: 708 + hello world: 709 + hello world: 710 + hello world: 711 + hello world: 712 + hello world: 713 + hello world: 714 + hello world: 715 + hello world: 716 + hello world: 717 + hello world: 718 + hello world: 719 + hello world: 720 + hello world: 721 + hello world: 722 + hello world: 723 + hello world: 724 + hello world: 725 + hello world: 726 + hello world: 727 + hello world: 728 + hello world: 729 + hello world: 730 + hello world: 731 + hello world: 732 + hello world: 733 + hello world: 734 + hello world: 735 + hello world: 736 + hello world: 737 + hello world: 738 + hello world: 739 + hello world: 740 + hello world: 741 + hello world: 742 + hello world: 743 + hello world: 744 + hello world: 745 + hello world: 746 + hello world: 747 + hello world: 748 + hello world: 749 + hello world: 750 + hello world: 751 + hello world: 752 + hello world: 753 + hello world: 754 + hello world: 755 + hello world: 756 + hello world: 757 + hello world: 758 + hello world: 759 + hello world: 760 + hello world: 761 + hello world: 762 + hello world: 763 + hello world: 764 + hello world: 765 + hello world: 766 + hello world: 767 + hello world: 768 + hello world: 769 + hello world: 770 + hello world: 771 + hello world: 772 + hello world: 773 + hello world: 774 + hello world: 775 + hello world: 776 + hello world: 777 + hello world: 778 + hello world: 779 + hello world: 780 + hello world: 781 + hello world: 782 + hello world: 783 + hello world: 784 + hello world: 785 + hello world: 786 + hello world: 787 + hello world: 788 + hello world: 789 + hello world: 790 + hello world: 791 + hello world: 792 + hello world: 793 + hello world: 794 + hello world: 795 + hello world: 796 + hello world: 797 + hello world: 798 + hello world: 799 + hello world: 800 + hello world: 801 + hello world: 802 + hello world: 803 + hello world: 804 + hello world: 805 + hello world: 806 + hello world: 807 + hello world: 808 + hello world: 809 + hello world: 810 + hello world: 811 + hello world: 812 + hello world: 813 + hello world: 814 + hello world: 815 + hello world: 816 + hello world: 817 + hello world: 818 + hello world: 819 + hello world: 820 + hello world: 821 + hello world: 822 + hello world: 823 + hello world: 824 + hello world: 825 + hello world: 826 + hello world: 827 + hello world: 828 + hello world: 829 + hello world: 830 + hello world: 831 + hello world: 832 + hello world: 833 + hello world: 834 + hello world: 835 + hello world: 836 + hello world: 837 + hello world: 838 + hello world: 839 + hello world: 840 + hello world: 841 + hello world: 842 + hello world: 843 + hello world: 844 + hello world: 845 + hello world: 846 + hello world: 847 + hello world: 848 + hello world: 849 + hello world: 850 + hello world: 851 + hello world: 852 + hello world: 853 + hello world: 854 + hello world: 855 + hello world: 856 + hello world: 857 + hello world: 858 + hello world: 859 + hello world: 860 + hello world: 861 + hello world: 862 + hello world: 863 + hello world: 864 + hello world: 865 + hello world: 866 + hello world: 867 + hello world: 868 + hello world: 869 + hello world: 870 + hello world: 871 + hello world: 872 + hello world: 873 + hello world: 874 + hello world: 875 + hello world: 876 + hello world: 877 + hello world: 878 + hello world: 879 + hello world: 880 + hello world: 881 + hello world: 882 + hello world: 883 + hello world: 884 + hello world: 885 + hello world: 886 + hello world: 887 + hello world: 888 + hello world: 889 + hello world: 890 + hello world: 891 + hello world: 892 + hello world: 893 + hello world: 894 + hello world: 895 + hello world: 896 + hello world: 897 + hello world: 898 + hello world: 899 + hello world: 900 + hello world: 901 + hello world: 902 + hello world: 903 + hello world: 904 + hello world: 905 + hello world: 906 + hello world: 907 + hello world: 908 + hello world: 909 + hello world: 910 + hello world: 911 + hello world: 912 + hello world: 913 + hello world: 914 + hello world: 915 + hello world: 916 + hello world: 917 + hello world: 918 + hello world: 919 + hello world: 920 + hello world: 921 + hello world: 922 + hello world: 923 + hello world: 924 + hello world: 925 + hello world: 926 + hello world: 927 + hello world: 928 + hello world: 929 + hello world: 930 + hello world: 931 + hello world: 932 + hello world: 933 + hello world: 934 + hello world: 935 + hello world: 936 + hello world: 937 + hello world: 938 + hello world: 939 + hello world: 940 + hello world: 941 + hello world: 942 + hello world: 943 + hello world: 944 + hello world: 945 + hello world: 946 + hello world: 947 + hello world: 948 + hello world: 949 + hello world: 950 + hello world: 951 + hello world: 952 + hello world: 953 + hello world: 954 + hello world: 955 + hello world: 956 + hello world: 957 + hello world: 958 + hello world: 959 + hello world: 960 + hello world: 961 + hello world: 962 + hello world: 963 + hello world: 964 + hello world: 965 + hello world: 966 + hello world: 967 + hello world: 968 + hello world: 969 + hello world: 970 + hello world: 971 + hello world: 972 + hello world: 973 + hello world: 974 + hello world: 975 + hello world: 976 + hello world: 977 + hello world: 978 + hello world: 979 + hello world: 980 + hello world: 981 + hello world: 982 + hello world: 983 + hello world: 984 + hello world: 985 + hello world: 986 + hello world: 987 + hello world: 988 + hello world: 989 + hello world: 990 + hello world: 991 + hello world: 992 + hello world: 993 + hello world: 994 + hello world: 995 + hello world: 996 + hello world: 997 + hello world: 998 + hello world: 999 + hello world: 1000 + hello world: 1001 + hello world: 1002 + hello world: 1003 + hello world: 1004 + hello world: 1005 + hello world: 1006 + hello world: 1007 + hello world: 1008 + hello world: 1009 + hello world: 1010 + hello world: 1011 + hello world: 1012 + hello world: 1013 + hello world: 1014 + hello world: 1015 + hello world: 1016 + hello world: 1017 + hello world: 1018 + hello world: 1019 + hello world: 1020 + hello world: 1021 + hello world: 1022 + hello world: 1023 + hello world: 1024 + hello world: 1025 + hello world: 1026 + hello world: 1027 + hello world: 1028 + hello world: 1029 + hello world: 1030 + hello world: 1031 + hello world: 1032 + hello world: 1033 + hello world: 1034 + hello world: 1035 + hello world: 1036 + hello world: 1037 + hello world: 1038 + hello world: 1039 + hello world: 1040 + hello world: 1041 + hello world: 1042 + hello world: 1043 + hello world: 1044 + hello world: 1045 + hello world: 1046 + hello world: 1047 + hello world: 1048 + hello world: 1049 + hello world: 1050 + hello world: 1051 + hello world: 1052 + hello world: 1053 + hello world: 1054 + hello world: 1055 + hello world: 1056 + hello world: 1057 + hello world: 1058 + hello world: 1059 + hello world: 1060 + hello world: 1061 + hello world: 1062 + hello world: 1063 + hello world: 1064 + hello world: 1065 + hello world: 1066 + hello world: 1067 + hello world: 1068 + hello world: 1069 + hello world: 1070 + hello world: 1071 + hello world: 1072 + hello world: 1073 + hello world: 1074 + hello world: 1075 + hello world: 1076 + hello world: 1077 + hello world: 1078 + hello world: 1079 + hello world: 1080 + hello world: 1081 + hello world: 1082 + hello world: 1083 + hello world: 1084 + hello world: 1085 + hello world: 1086 + hello world: 1087 + hello world: 1088 + hello world: 1089 + hello world: 1090 + hello world: 1091 + hello world: 1092 + hello world: 1093 + hello world: 1094 + hello world: 1095 + hello world: 1096 + hello world: 1097 + hello world: 1098 + hello world: 1099 + hello world: 1100 + hello world: 1101 + hello world: 1102 + hello world: 1103 + hello world: 1104 + hello world: 1105 + hello world: 1106 + hello world: 1107 + hello world: 1108 + hello world: 1109 + hello world: 1110 + hello world: 1111 + hello world: 1112 + hello world: 1113 + hello world: 1114 + hello world: 1115 + hello world: 1116 + hello world: 1117 + hello world: 1118 + hello world: 1119 + hello world: 1120 + hello world: 1121 + hello world: 1122 + hello world: 1123 + hello world: 1124 + hello world: 1125 + hello world: 1126 + hello world: 1127 + hello world: 1128 + hello world: 1129 + hello world: 1130 + hello world: 1131 + hello world: 1132 + hello world: 1133 + hello world: 1134 + hello world: 1135 + hello world: 1136 + hello world: 1137 + hello world: 1138 + hello world: 1139 + hello world: 1140 + hello world: 1141 + hello world: 1142 + hello world: 1143 + hello world: 1144 + hello world: 1145 + hello world: 1146 + hello world: 1147 + hello world: 1148 + hello world: 1149 + hello world: 1150 + hello world: 1151 + hello world: 1152 + hello world: 1153 + hello world: 1154 + hello world: 1155 + hello world: 1156 + hello world: 1157 + hello world: 1158 + hello world: 1159 + hello world: 1160 + hello world: 1161 + hello world: 1162 + hello world: 1163 + hello world: 1164 + hello world: 1165 + hello world: 1166 + hello world: 1167 + hello world: 1168 + hello world: 1169 + hello world: 1170 + hello world: 1171 + hello world: 1172 + hello world: 1173 + hello world: 1174 + hello world: 1175 + hello world: 1176 + hello world: 1177 + hello world: 1178 + hello world: 1179 + hello world: 1180 + hello world: 1181 + hello world: 1182 + hello world: 1183 + hello world: 1184 + hello world: 1185 + hello world: 1186 + hello world: 1187 + hello world: 1188 + hello world: 1189 + hello world: 1190 + hello world: 1191 + hello world: 1192 + hello world: 1193 + hello world: 1194 + hello world: 1195 + hello world: 1196 + hello world: 1197 + hello world: 1198 + hello world: 1199 + hello world: 1200 + hello world: 1201 + hello world: 1202 + hello world: 1203 + hello world: 1204 + hello world: 1205 + hello world: 1206 + hello world: 1207 + hello world: 1208 + hello world: 1209 + hello world: 1210 + hello world: 1211 + hello world: 1212 + hello world: 1213 + hello world: 1214 + hello world: 1215 + hello world: 1216 + hello world: 1217 + hello world: 1218 + hello world: 1219 + hello world: 1220 + hello world: 1221 + hello world: 1222 + hello world: 1223 + hello world: 1224 + hello world: 1225 + hello world: 1226 + hello world: 1227 + hello world: 1228 + hello world: 1229 + hello world: 1230 + hello world: 1231 + hello world: 1232 + hello world: 1233 + hello world: 1234 + hello world: 1235 + hello world: 1236 + hello world: 1237 + hello world: 1238 + hello world: 1239 + hello world: 1240 + hello world: 1241 + hello world: 1242 + hello world: 1243 + hello world: 1244 + hello world: 1245 + hello world: 1246 + hello world: 1247 + hello world: 1248 + hello world: 1249 + hello world: 1250 + hello world: 1251 + hello world: 1252 + hello world: 1253 + hello world: 1254 + hello world: 1255 + hello world: 1256 + hello world: 1257 + hello world: 1258 + hello world: 1259 + hello world: 1260 + hello world: 1261 + hello world: 1262 + hello world: 1263 + hello world: 1264 + hello world: 1265 + hello world: 1266 + hello world: 1267 + hello world: 1268 + hello world: 1269 + hello world: 1270 + hello world: 1271 + hello world: 1272 + hello world: 1273 + hello world: 1274 + hello world: 1275 + hello world: 1276 + hello world: 1277 + hello world: 1278 + hello world: 1279 + hello world: 1280 + hello world: 1281 + hello world: 1282 + hello world: 1283 + hello world: 1284 + hello world: 1285 + hello world: 1286 + hello world: 1287 + hello world: 1288 + hello world: 1289 + hello world: 1290 + hello world: 1291 + hello world: 1292 + hello world: 1293 + hello world: 1294 + hello world: 1295 + hello world: 1296 + hello world: 1297 + hello world: 1298 + hello world: 1299 + hello world: 1300 + hello world: 1301 + hello world: 1302 + hello world: 1303 + hello world: 1304 + hello world: 1305 + hello world: 1306 + hello world: 1307 + hello world: 1308 + hello world: 1309 + hello world: 1310 + hello world: 1311 + hello world: 1312 + hello world: 1313 + hello world: 1314 + hello world: 1315 + hello world: 1316 + hello world: 1317 + hello world: 1318 + hello world: 1319 + hello world: 1320 + hello world: 1321 + hello world: 1322 + hello world: 1323 + hello world: 1324 + hello world: 1325 + hello world: 1326 + hello world: 1327 + hello world: 1328 + hello world: 1329 + hello world: 1330 + hello world: 1331 + hello world: 1332 + hello world: 1333 + hello world: 1334 + hello world: 1335 + hello world: 1336 + hello world: 1337 + hello world: 1338 + hello world: 1339 + hello world: 1340 + hello world: 1341 + hello world: 1342 + hello world: 1343 + hello world: 1344 + hello world: 1345 + hello world: 1346 + hello world: 1347 + hello world: 1348 + hello world: 1349 + hello world: 1350 + hello world: 1351 + hello world: 1352 + hello world: 1353 + hello world: 1354 + hello world: 1355 + hello world: 1356 + hello world: 1357 + hello world: 1358 + hello world: 1359 + hello world: 1360 + hello world: 1361 + hello world: 1362 + hello world: 1363 + hello world: 1364 + hello world: 1365 + hello world: 1366 + hello world: 1367 + hello world: 1368 + hello world: 1369 + hello world: 1370 + hello world: 1371 + hello world: 1372 + hello world: 1373 + hello world: 1374 + hello world: 1375 + hello world: 1376 + hello world: 1377 + hello world: 1378 + hello world: 1379 + hello world: 1380 + hello world: 1381 + hello world: 1382 + hello world: 1383 + hello world: 1384 + hello world: 1385 + hello world: 1386 + hello world: 1387 + hello world: 1388 + hello world: 1389 + hello world: 1390 + hello world: 1391 + hello world: 1392 + hello world: 1393 + hello world: 1394 + hello world: 1395 + hello world: 1396 + hello world: 1397 + hello world: 1398 + hello world: 1399 + hello world: 1400 + hello world: 1401 + hello world: 1402 + hello world: 1403 + hello world: 1404 + hello world: 1405 + hello world: 1406 + hello world: 1407 + hello world: 1408 + hello world: 1409 + hello world: 1410 + hello world: 1411 + hello world: 1412 + hello world: 1413 + hello world: 1414 + hello world: 1415 + hello world: 1416 + hello world: 1417 + hello world: 1418 + hello world: 1419 + hello world: 1420 + hello world: 1421 + hello world: 1422 + hello world: 1423 + hello world: 1424 + hello world: 1425 + hello world: 1426 + hello world: 1427 + hello world: 1428 + hello world: 1429 + hello world: 1430 + hello world: 1431 + hello world: 1432 + hello world: 1433 + hello world: 1434 + hello world: 1435 + hello world: 1436 + hello world: 1437 + hello world: 1438 + hello world: 1439 + hello world: 1440 + hello world: 1441 + hello world: 1442 + hello world: 1443 + hello world: 1444 + hello world: 1445 + hello world: 1446 + hello world: 1447 + hello world: 1448 + hello world: 1449 + hello world: 1450 + hello world: 1451 + hello world: 1452 + hello world: 1453 + hello world: 1454 + hello world: 1455 + hello world: 1456 + hello world: 1457 + hello world: 1458 + hello world: 1459 + hello world: 1460 + hello world: 1461 + hello world: 1462 + hello world: 1463 + hello world: 1464 + hello world: 1465 + hello world: 1466 + hello world: 1467 + hello world: 1468 + hello world: 1469 + hello world: 1470 + hello world: 1471 + hello world: 1472 + hello world: 1473 + hello world: 1474 + hello world: 1475 + hello world: 1476 + hello world: 1477 + hello world: 1478 + hello world: 1479 + hello world: 1480 + hello world: 1481 + hello world: 1482 + hello world: 1483 + hello world: 1484 + hello world: 1485 + hello world: 1486 + hello world: 1487 + hello world: 1488 + hello world: 1489 + hello world: 1490 + hello world: 1491 + hello world: 1492 + hello world: 1493 + hello world: 1494 + hello world: 1495 + hello world: 1496 + hello world: 1497 + hello world: 1498 + hello world: 1499 + hello world: 1500 + hello world: 1501 + hello world: 1502 + hello world: 1503 + hello world: 1504 + hello world: 1505 + hello world: 1506 + hello world: 1507 + hello world: 1508 + hello world: 1509 + hello world: 1510 + hello world: 1511 + hello world: 1512 + hello world: 1513 + hello world: 1514 + hello world: 1515 + hello world: 1516 + hello world: 1517 + hello world: 1518 + hello world: 1519 + hello world: 1520 + hello world: 1521 + hello world: 1522 + hello world: 1523 + hello world: 1524 + hello world: 1525 + hello world: 1526 + hello world: 1527 + hello world: 1528 + hello world: 1529 + hello world: 1530 + hello world: 1531 + hello world: 1532 + hello world: 1533 + hello world: 1534 + hello world: 1535 + hello world: 1536 + hello world: 1537 + hello world: 1538 + hello world: 1539 + hello world: 1540 + hello world: 1541 + hello world: 1542 + hello world: 1543 + hello world: 1544 + hello world: 1545 + hello world: 1546 + hello world: 1547 + hello world: 1548 + hello world: 1549 + hello world: 1550 + hello world: 1551 + hello world: 1552 + hello world: 1553 + hello world: 1554 + hello world: 1555 + hello world: 1556 + hello world: 1557 + hello world: 1558 + hello world: 1559 + hello world: 1560 + hello world: 1561 + hello world: 1562 + hello world: 1563 + hello world: 1564 + hello world: 1565 + hello world: 1566 + hello world: 1567 + hello world: 1568 + hello world: 1569 + hello world: 1570 + hello world: 1571 + hello world: 1572 + hello world: 1573 + hello world: 1574 + hello world: 1575 + hello world: 1576 + hello world: 1577 + hello world: 1578 + hello world: 1579 + hello world: 1580 + hello world: 1581 + hello world: 1582 + hello world: 1583 + hello world: 1584 + hello world: 1585 + hello world: 1586 + hello world: 1587 + hello world: 1588 + hello world: 1589 + hello world: 1590 + hello world: 1591 + hello world: 1592 + hello world: 1593 + hello world: 1594 + hello world: 1595 + hello world: 1596 + hello world: 1597 + hello world: 1598 + hello world: 1599 + hello world: 1600 + hello world: 1601 + hello world: 1602 + hello world: 1603 + hello world: 1604 + hello world: 1605 + hello world: 1606 + hello world: 1607 + hello world: 1608 + hello world: 1609 + hello world: 1610 + hello world: 1611 + hello world: 1612 + hello world: 1613 + hello world: 1614 + hello world: 1615 + hello world: 1616 + hello world: 1617 + hello world: 1618 + hello world: 1619 + hello world: 1620 + hello world: 1621 + hello world: 1622 + hello world: 1623 + hello world: 1624 + hello world: 1625 + hello world: 1626 + hello world: 1627 + hello world: 1628 + hello world: 1629 + hello world: 1630 + hello world: 1631 + hello world: 1632 + hello world: 1633 + hello world: 1634 + hello world: 1635 + hello world: 1636 + hello world: 1637 + hello world: 1638 + hello world: 1639 + hello world: 1640 + hello world: 1641 + hello world: 1642 + hello world: 1643 + hello world: 1644 + hello world: 1645 + hello world: 1646 + hello world: 1647 + hello world: 1648 + hello world: 1649 + hello world: 1650 + hello world: 1651 + hello world: 1652 + hello world: 1653 + hello world: 1654 + hello world: 1655 + hello world: 1656 + hello world: 1657 + hello world: 1658 + hello world: 1659 + hello world: 1660 + hello world: 1661 + hello world: 1662 + hello world: 1663 + hello world: 1664 + hello world: 1665 + hello world: 1666 + hello world: 1667 + hello world: 1668 + hello world: 1669 + hello world: 1670 + hello world: 1671 + hello world: 1672 + hello world: 1673 + hello world: 1674 + hello world: 1675 + hello world: 1676 + hello world: 1677 + hello world: 1678 + hello world: 1679 + hello world: 1680 + hello world: 1681 + hello world: 1682 + hello world: 1683 + hello world: 1684 + hello world: 1685 + hello world: 1686 + hello world: 1687 + hello world: 1688 + hello world: 1689 + hello world: 1690 + hello world: 1691 + hello world: 1692 + hello world: 1693 + hello world: 1694 + hello world: 1695 + hello world: 1696 + hello world: 1697 + hello world: 1698 + hello world: 1699 + hello world: 1700 + hello world: 1701 + hello world: 1702 + hello world: 1703 + hello world: 1704 + hello world: 1705 + hello world: 1706 + hello world: 1707 + hello world: 1708 + hello world: 1709 + hello world: 1710 + hello world: 1711 + hello world: 1712 + hello world: 1713 + hello world: 1714 + hello world: 1715 + hello world: 1716 + hello world: 1717 + hello world: 1718 + hello world: 1719 + hello world: 1720 + hello world: 1721 + hello world: 1722 + hello world: 1723 + hello world: 1724 + hello world: 1725 + hello world: 1726 + hello world: 1727 + hello world: 1728 + hello world: 1729 + hello world: 1730 + hello world: 1731 + hello world: 1732 + hello world: 1733 + hello world: 1734 + hello world: 1735 + hello world: 1736 + hello world: 1737 + hello world: 1738 + hello world: 1739 + hello world: 1740 + hello world: 1741 + hello world: 1742 + hello world: 1743 + hello world: 1744 + hello world: 1745 + hello world: 1746 + hello world: 1747 + hello world: 1748 + hello world: 1749 + hello world: 1750 + hello world: 1751 + hello world: 1752 + hello world: 1753 + hello world: 1754 + hello world: 1755 + hello world: 1756 + hello world: 1757 + hello world: 1758 + hello world: 1759 + hello world: 1760 + hello world: 1761 + hello world: 1762 + hello world: 1763 + hello world: 1764 + hello world: 1765 + hello world: 1766 + hello world: 1767 + hello world: 1768 + hello world: 1769 + hello world: 1770 + hello world: 1771 + hello world: 1772 + hello world: 1773 + hello world: 1774 + hello world: 1775 + hello world: 1776 + hello world: 1777 + hello world: 1778 + hello world: 1779 + hello world: 1780 + hello world: 1781 + hello world: 1782 + hello world: 1783 + hello world: 1784 + hello world: 1785 + hello world: 1786 + hello world: 1787 + hello world: 1788 + hello world: 1789 + hello world: 1790 + hello world: 1791 + hello world: 1792 + hello world: 1793 + hello world: 1794 + hello world: 1795 + hello world: 1796 + hello world: 1797 + hello world: 1798 + hello world: 1799 + hello world: 1800 + hello world: 1801 + hello world: 1802 + hello world: 1803 + hello world: 1804 + hello world: 1805 + hello world: 1806 + hello world: 1807 + hello world: 1808 + hello world: 1809 + hello world: 1810 + hello world: 1811 + hello world: 1812 + hello world: 1813 + hello world: 1814 + hello world: 1815 + hello world: 1816 + hello world: 1817 + hello world: 1818 + hello world: 1819 + hello world: 1820 + hello world: 1821 + hello world: 1822 + hello world: 1823 + hello world: 1824 + hello world: 1825 + hello world: 1826 + hello world: 1827 + hello world: 1828 + hello world: 1829 + hello world: 1830 + hello world: 1831 + hello world: 1832 + hello world: 1833 + hello world: 1834 + hello world: 1835 + hello world: 1836 + hello world: 1837 + hello world: 1838 + hello world: 1839 + hello world: 1840 + hello world: 1841 + hello world: 1842 + hello world: 1843 + hello world: 1844 + hello world: 1845 + hello world: 1846 + hello world: 1847 + hello world: 1848 + hello world: 1849 + hello world: 1850 + hello world: 1851 + hello world: 1852 + hello world: 1853 + hello world: 1854 + hello world: 1855 + hello world: 1856 + hello world: 1857 + hello world: 1858 + hello world: 1859 + hello world: 1860 + hello world: 1861 + hello world: 1862 + hello world: 1863 + hello world: 1864 + hello world: 1865 + hello world: 1866 + hello world: 1867 + hello world: 1868 + hello world: 1869 + hello world: 1870 + hello world: 1871 + hello world: 1872 + hello world: 1873 + hello world: 1874 + hello world: 1875 + hello world: 1876 + hello world: 1877 + hello world: 1878 + hello world: 1879 + hello world: 1880 + hello world: 1881 + hello world: 1882 + hello world: 1883 + hello world: 1884 + hello world: 1885 + hello world: 1886 + hello world: 1887 + hello world: 1888 + hello world: 1889 + hello world: 1890 + hello world: 1891 + hello world: 1892 + hello world: 1893 + hello world: 1894 + hello world: 1895 + hello world: 1896 + hello world: 1897 + hello world: 1898 + hello world: 1899 + hello world: 1900 + hello world: 1901 + hello world: 1902 + hello world: 1903 + hello world: 1904 + hello world: 1905 + hello world: 1906 + hello world: 1907 + hello world: 1908 + hello world: 1909 + hello world: 1910 + hello world: 1911 + hello world: 1912 + hello world: 1913 + hello world: 1914 + hello world: 1915 + hello world: 1916 + hello world: 1917 + hello world: 1918 + hello world: 1919 + hello world: 1920 + hello world: 1921 + hello world: 1922 + hello world: 1923 + hello world: 1924 + hello world: 1925 + hello world: 1926 + hello world: 1927 + hello world: 1928 + hello world: 1929 + hello world: 1930 + hello world: 1931 + hello world: 1932 + hello world: 1933 + hello world: 1934 + hello world: 1935 + hello world: 1936 + hello world: 1937 + hello world: 1938 + hello world: 1939 + hello world: 1940 + hello world: 1941 + hello world: 1942 + hello world: 1943 + hello world: 1944 + hello world: 1945 + hello world: 1946 + hello world: 1947 + hello world: 1948 + hello world: 1949 + hello world: 1950 + hello world: 1951 + hello world: 1952 + hello world: 1953 + hello world: 1954 + hello world: 1955 + hello world: 1956 + hello world: 1957 + hello world: 1958 + hello world: 1959 + hello world: 1960 + hello world: 1961 + hello world: 1962 + hello world: 1963 + hello world: 1964 + hello world: 1965 + hello world: 1966 + hello world: 1967 + hello world: 1968 + hello world: 1969 + hello world: 1970 + hello world: 1971 + hello world: 1972 + hello world: 1973 + hello world: 1974 + hello world: 1975 + hello world: 1976 + hello world: 1977 + hello world: 1978 + hello world: 1979 + hello world: 1980 + hello world: 1981 + hello world: 1982 + hello world: 1983 + hello world: 1984 + hello world: 1985 + hello world: 1986 + hello world: 1987 + hello world: 1988 + hello world: 1989 + hello world: 1990 + hello world: 1991 + hello world: 1992 + hello world: 1993 + hello world: 1994 + hello world: 1995 + hello world: 1996 + hello world: 1997 + hello world: 1998 + hello world: 1999 + hello world: 2000 + hello world: 2001 + hello world: 2002 + hello world: 2003 + hello world: 2004 + hello world: 2005 + hello world: 2006 + hello world: 2007 + hello world: 2008 + hello world: 2009 + hello world: 2010 + hello world: 2011 + hello world: 2012 + hello world: 2013 + hello world: 2014 + hello world: 2015 + hello world: 2016 + hello world: 2017 + hello world: 2018 + hello world: 2019 + hello world: 2020 + hello world: 2021 + hello world: 2022 + hello world: 2023 + hello world: 2024 + hello world: 2025 + hello world: 2026 + hello world: 2027 + hello world: 2028 + hello world: 2029 + hello world: 2030 + hello world: 2031 + hello world: 2032 + hello world: 2033 + hello world: 2034 + hello world: 2035 + hello world: 2036 + hello world: 2037 + hello world: 2038 + hello world: 2039 + hello world: 2040 + hello world: 2041 + hello world: 2042 + hello world: 2043 + hello world: 2044 + hello world: 2045 + hello world: 2046 + hello world: 2047 + hello world: 2048 + hello world: 2049 + hello world: 2050 + hello world: 2051 + hello world: 2052 + hello world: 2053 + hello world: 2054 + hello world: 2055 + hello world: 2056 + hello world: 2057 + hello world: 2058 + hello world: 2059 + hello world: 2060 + hello world: 2061 + hello world: 2062 + hello world: 2063 + hello world: 2064 + hello world: 2065 + hello world: 2066 + hello world: 2067 + hello world: 2068 + hello world: 2069 + hello world: 2070 + hello world: 2071 + hello world: 2072 + hello world: 2073 + hello world: 2074 + hello world: 2075 + hello world: 2076 + hello world: 2077 + hello world: 2078 + hello world: 2079 + hello world: 2080 + hello world: 2081 + hello world: 2082 + hello world: 2083 + hello world: 2084 + hello world: 2085 + hello world: 2086 + hello world: 2087 + hello world: 2088 + hello world: 2089 + hello world: 2090 + hello world: 2091 + hello world: 2092 + hello world: 2093 + hello world: 2094 + hello world: 2095 + hello world: 2096 + hello world: 2097 + hello world: 2098 + hello world: 2099 + hello world: 2100 + hello world: 2101 + hello world: 2102 + hello world: 2103 + hello world: 2104 + hello world: 2105 + hello world: 2106 + hello world: 2107 + hello world: 2108 + hello world: 2109 + hello world: 2110 + hello world: 2111 + hello world: 2112 + hello world: 2113 + hello world: 2114 + hello world: 2115 + hello world: 2116 + hello world: 2117 + hello world: 2118 + hello world: 2119 + hello world: 2120 + hello world: 2121 + hello world: 2122 + hello world: 2123 + hello world: 2124 + hello world: 2125 + hello world: 2126 + hello world: 2127 + hello world: 2128 + hello world: 2129 + hello world: 2130 + hello world: 2131 + hello world: 2132 + hello world: 2133 + hello world: 2134 + hello world: 2135 + hello world: 2136 + hello world: 2137 + hello world: 2138 + hello world: 2139 + hello world: 2140 + hello world: 2141 + hello world: 2142 + hello world: 2143 + hello world: 2144 + hello world: 2145 + hello world: 2146 + hello world: 2147 + hello world: 2148 + hello world: 2149 + hello world: 2150 + hello world: 2151 + hello world: 2152 + hello world: 2153 + hello world: 2154 + hello world: 2155 + hello world: 2156 + hello world: 2157 + hello world: 2158 + hello world: 2159 + hello world: 2160 + hello world: 2161 + hello world: 2162 + hello world: 2163 + hello world: 2164 + hello world: 2165 + hello world: 2166 + hello world: 2167 + hello world: 2168 + hello world: 2169 + hello world: 2170 + hello world: 2171 + hello world: 2172 + hello world: 2173 + hello world: 2174 + hello world: 2175 + hello world: 2176 + hello world: 2177 + hello world: 2178 + hello world: 2179 + hello world: 2180 + hello world: 2181 + hello world: 2182 + hello world: 2183 + hello world: 2184 + hello world: 2185 + hello world: 2186 + hello world: 2187 + hello world: 2188 + hello world: 2189 + hello world: 2190 + hello world: 2191 + hello world: 2192 + hello world: 2193 + hello world: 2194 + hello world: 2195 + hello world: 2196 + hello world: 2197 + hello world: 2198 + hello world: 2199 + hello world: 2200 + hello world: 2201 + hello world: 2202 + hello world: 2203 + hello world: 2204 + hello world: 2205 + hello world: 2206 + hello world: 2207 + hello world: 2208 + hello world: 2209 + hello world: 2210 + hello world: 2211 + hello world: 2212 + hello world: 2213 + hello world: 2214 + hello world: 2215 + hello world: 2216 + hello world: 2217 + hello world: 2218 + hello world: 2219 + hello world: 2220 + hello world: 2221 + hello world: 2222 + hello world: 2223 + hello world: 2224 + hello world: 2225 + hello world: 2226 + hello world: 2227 + hello world: 2228 + hello world: 2229 + hello world: 2230 + hello world: 2231 + hello world: 2232 + hello world: 2233 + hello world: 2234 + hello world: 2235 + hello world: 2236 + hello world: 2237 + hello world: 2238 + hello world: 2239 + hello world: 2240 + hello world: 2241 + hello world: 2242 + hello world: 2243 + hello world: 2244 + hello world: 2245 + hello world: 2246 + hello world: 2247 + hello world: 2248 + hello world: 2249 + hello world: 2250 + hello world: 2251 + hello world: 2252 + hello world: 2253 + hello world: 2254 + hello world: 2255 + hello world: 2256 + hello world: 2257 + hello world: 2258 + hello world: 2259 + hello world: 2260 + hello world: 2261 + hello world: 2262 + hello world: 2263 + hello world: 2264 + hello world: 2265 + hello world: 2266 + hello world: 2267 + hello world: 2268 + hello world: 2269 + hello world: 2270 + hello world: 2271 + hello world: 2272 + hello world: 2273 + hello world: 2274 + hello world: 2275 + hello world: 2276 + hello world: 2277 + hello world: 2278 + hello world: 2279 + hello world: 2280 + hello world: 2281 + hello world: 2282 + hello world: 2283 + hello world: 2284 + hello world: 2285 + hello world: 2286 + hello world: 2287 + hello world: 2288 + hello world: 2289 + hello world: 2290 + hello world: 2291 + hello world: 2292 + hello world: 2293 + hello world: 2294 + hello world: 2295 + hello world: 2296 + hello world: 2297 + hello world: 2298 + hello world: 2299 + hello world: 2300 + hello world: 2301 + hello world: 2302 + hello world: 2303 + hello world: 2304 + hello world: 2305 + hello world: 2306 + hello world: 2307 + hello world: 2308 + hello world: 2309 + hello world: 2310 + hello world: 2311 + hello world: 2312 + hello world: 2313 + hello world: 2314 + hello world: 2315 + hello world: 2316 + hello world: 2317 + hello world: 2318 + hello world: 2319 + hello world: 2320 + hello world: 2321 + hello world: 2322 + hello world: 2323 + hello world: 2324 + hello world: 2325 + hello world: 2326 + hello world: 2327 + hello world: 2328 + hello world: 2329 + hello world: 2330 + hello world: 2331 + hello world: 2332 + hello world: 2333 + hello world: 2334 + hello world: 2335 + hello world: 2336 + hello world: 2337 + hello world: 2338 + hello world: 2339 + hello world: 2340 + hello world: 2341 + hello world: 2342 + hello world: 2343 + hello world: 2344 + hello world: 2345 + hello world: 2346 + hello world: 2347 + hello world: 2348 + hello world: 2349 + hello world: 2350 + hello world: 2351 + hello world: 2352 + hello world: 2353 + hello world: 2354 + hello world: 2355 + hello world: 2356 + hello world: 2357 + hello world: 2358 + hello world: 2359 + hello world: 2360 + hello world: 2361 + hello world: 2362 + hello world: 2363 + hello world: 2364 + hello world: 2365 + hello world: 2366 + hello world: 2367 + hello world: 2368 + hello world: 2369 + hello world: 2370 + hello world: 2371 + hello world: 2372 + hello world: 2373 + hello world: 2374 + hello world: 2375 + hello world: 2376 + hello world: 2377 + hello world: 2378 + hello world: 2379 + hello world: 2380 + hello world: 2381 + hello world: 2382 + hello world: 2383 + hello world: 2384 + hello world: 2385 + hello world: 2386 + hello world: 2387 + hello world: 2388 + hello world: 2389 + hello world: 2390 + hello world: 2391 + hello world: 2392 + hello world: 2393 + hello world: 2394 + hello world: 2395 + hello world: 2396 + hello world: 2397 + hello world: 2398 + hello world: 2399 + hello world: 2400 + hello world: 2401 + hello world: 2402 + hello world: 2403 + hello world: 2404 + hello world: 2405 + hello world: 2406 + hello world: 2407 + hello world: 2408 + hello world: 2409 + hello world: 2410 + hello world: 2411 + hello world: 2412 + hello world: 2413 + hello world: 2414 + hello world: 2415 + hello world: 2416 + hello world: 2417 + hello world: 2418 + hello world: 2419 + hello world: 2420 + hello world: 2421 + hello world: 2422 + hello world: 2423 + hello world: 2424 + hello world: 2425 + hello world: 2426 + hello world: 2427 + hello world: 2428 + hello world: 2429 + hello world: 2430 + hello world: 2431 + hello world: 2432 + hello world: 2433 + hello world: 2434 + hello world: 2435 + hello world: 2436 + hello world: 2437 + hello world: 2438 + hello world: 2439 + hello world: 2440 + hello world: 2441 + hello world: 2442 + hello world: 2443 + hello world: 2444 + hello world: 2445 + hello world: 2446 + hello world: 2447 + hello world: 2448 + hello world: 2449 + hello world: 2450 + hello world: 2451 + hello world: 2452 + hello world: 2453 + hello world: 2454 + hello world: 2455 + hello world: 2456 + hello world: 2457 + hello world: 2458 + hello world: 2459 + hello world: 2460 + hello world: 2461 + hello world: 2462 + hello world: 2463 + hello world: 2464 + hello world: 2465 + hello world: 2466 + hello world: 2467 + hello world: 2468 + hello world: 2469 + hello world: 2470 + hello world: 2471 + hello world: 2472 + hello world: 2473 + hello world: 2474 + hello world: 2475 + hello world: 2476 + hello world: 2477 + hello world: 2478 + hello world: 2479 + hello world: 2480 + hello world: 2481 + hello world: 2482 + hello world: 2483 + hello world: 2484 + hello world: 2485 + hello world: 2486 + hello world: 2487 + hello world: 2488 + hello world: 2489 + hello world: 2490 + hello world: 2491 + hello world: 2492 + hello world: 2493 + hello world: 2494 + hello world: 2495 + hello world: 2496 + hello world: 2497 + hello world: 2498 + hello world: 2499 + hello world: 2500 + hello world: 2501 + hello world: 2502 + hello world: 2503 + hello world: 2504 + hello world: 2505 + hello world: 2506 + hello world: 2507 + hello world: 2508 + hello world: 2509 + hello world: 2510 + hello world: 2511 + hello world: 2512 + hello world: 2513 + hello world: 2514 + hello world: 2515 + hello world: 2516 + hello world: 2517 + hello world: 2518 + hello world: 2519 + hello world: 2520 + hello world: 2521 + hello world: 2522 + hello world: 2523 + hello world: 2524 + hello world: 2525 + hello world: 2526 + hello world: 2527 + hello world: 2528 + hello world: 2529 + hello world: 2530 + hello world: 2531 + hello world: 2532 + hello world: 2533 + hello world: 2534 + hello world: 2535 + hello world: 2536 + hello world: 2537 + hello world: 2538 + hello world: 2539 + hello world: 2540 + hello world: 2541 + hello world: 2542 + hello world: 2543 + hello world: 2544 + hello world: 2545 + hello world: 2546 + hello world: 2547 + hello world: 2548 + hello world: 2549 + hello world: 2550 + hello world: 2551 + hello world: 2552 + hello world: 2553 + hello world: 2554 + hello world: 2555 + hello world: 2556 + hello world: 2557 + hello world: 2558 + hello world: 2559 + hello world: 2560 + hello world: 2561 + hello world: 2562 + hello world: 2563 + hello world: 2564 + hello world: 2565 + hello world: 2566 + hello world: 2567 + hello world: 2568 + hello world: 2569 + hello world: 2570 + hello world: 2571 + hello world: 2572 + hello world: 2573 + hello world: 2574 + hello world: 2575 + hello world: 2576 + hello world: 2577 + hello world: 2578 + hello world: 2579 + hello world: 2580 + hello world: 2581 + hello world: 2582 + hello world: 2583 + hello world: 2584 + hello world: 2585 + hello world: 2586 + hello world: 2587 + hello world: 2588 + hello world: 2589 + hello world: 2590 + hello world: 2591 + hello world: 2592 + hello world: 2593 + hello world: 2594 + hello world: 2595 + hello world: 2596 + hello world: 2597 + hello world: 2598 + hello world: 2599 + hello world: 2600 + hello world: 2601 + hello world: 2602 + hello world: 2603 + hello world: 2604 + hello world: 2605 + hello world: 2606 + hello world: 2607 + hello world: 2608 + hello world: 2609 + hello world: 2610 + hello world: 2611 + hello world: 2612 + hello world: 2613 + hello world: 2614 + hello world: 2615 + hello world: 2616 + hello world: 2617 + hello world: 2618 + hello world: 2619 + hello world: 2620 + hello world: 2621 + hello world: 2622 + hello world: 2623 + hello world: 2624 + hello world: 2625 + hello world: 2626 + hello world: 2627 + hello world: 2628 + hello world: 2629 + hello world: 2630 + hello world: 2631 + hello world: 2632 + hello world: 2633 + hello world: 2634 + hello world: 2635 + hello world: 2636 + hello world: 2637 + hello world: 2638 + hello world: 2639 + hello world: 2640 + hello world: 2641 + hello world: 2642 + hello world: 2643 + hello world: 2644 + hello world: 2645 + hello world: 2646 + hello world: 2647 + hello world: 2648 + hello world: 2649 + hello world: 2650 + hello world: 2651 + hello world: 2652 + hello world: 2653 + hello world: 2654 + hello world: 2655 + hello world: 2656 + hello world: 2657 + hello world: 2658 + hello world: 2659 + hello world: 2660 + hello world: 2661 + hello world: 2662 + hello world: 2663 + hello world: 2664 + hello world: 2665 + hello world: 2666 + hello world: 2667 + hello world: 2668 + hello world: 2669 + hello world: 2670 + hello world: 2671 + hello world: 2672 + hello world: 2673 + hello world: 2674 + hello world: 2675 + hello world: 2676 + hello world: 2677 + hello world: 2678 + hello world: 2679 + hello world: 2680 + hello world: 2681 + hello world: 2682 + hello world: 2683 + hello world: 2684 + hello world: 2685 + hello world: 2686 + hello world: 2687 + hello world: 2688 + hello world: 2689 + hello world: 2690 + hello world: 2691 + hello world: 2692 + hello world: 2693 + hello world: 2694 + hello world: 2695 + hello world: 2696 + hello world: 2697 + hello world: 2698 + hello world: 2699 + hello world: 2700 + hello world: 2701 + hello world: 2702 + hello world: 2703 + hello world: 2704 + hello world: 2705 + hello world: 2706 + hello world: 2707 + hello world: 2708 + hello world: 2709 + hello world: 2710 + hello world: 2711 + hello world: 2712 + hello world: 2713 + hello world: 2714 + hello world: 2715 + hello world: 2716 + hello world: 2717 + hello world: 2718 + hello world: 2719 + hello world: 2720 + hello world: 2721 + hello world: 2722 + hello world: 2723 + hello world: 2724 + hello world: 2725 + hello world: 2726 + hello world: 2727 + hello world: 2728 + hello world: 2729 + hello world: 2730 + hello world: 2731 + hello world: 2732 + hello world: 2733 + hello world: 2734 + hello world: 2735 + hello world: 2736 + hello world: 2737 + hello world: 2738 + hello world: 2739 + hello world: 2740 + hello world: 2741 + hello world: 2742 + hello world: 2743 + hello world: 2744 + hello world: 2745 + hello world: 2746 + hello world: 2747 + hello world: 2748 + hello world: 2749 + hello world: 2750 + hello world: 2751 + hello world: 2752 + hello world: 2753 + hello world: 2754 + hello world: 2755 + hello world: 2756 + hello world: 2757 + hello world: 2758 + hello world: 2759 + hello world: 2760 + hello world: 2761 + hello world: 2762 + hello world: 2763 + hello world: 2764 + hello world: 2765 + hello world: 2766 + hello world: 2767 + hello world: 2768 + hello world: 2769 + hello world: 2770 + hello world: 2771 + hello world: 2772 + hello world: 2773 + hello world: 2774 + hello world: 2775 + hello world: 2776 + hello world: 2777 + hello world: 2778 + hello world: 2779 + hello world: 2780 + hello world: 2781 + hello world: 2782 + hello world: 2783 + hello world: 2784 + hello world: 2785 + hello world: 2786 + hello world: 2787 + hello world: 2788 + hello world: 2789 + hello world: 2790 + hello world: 2791 + hello + + + +```python +import os +os.remove('newfile.txt') +``` + +出现异常时候的读写: + + +```python +f = open('newfile.txt','w') +for i in range(3000): + x = 1.0 / (i - 1000) + f.write('hello world: ' + str(i) + '\n') +``` + + + --------------------------------------------------------------------------- + + ZeroDivisionError Traceback (most recent call last) + + in () + 1 f = open('newfile.txt','w') + 2 for i in range(3000): + ----> 3 x = 1.0 / (i - 1000) + 4 f.write('hello world: ' + str(i) + '\n') + + + ZeroDivisionError: float division by zero + + +查看已有内容: + + +```python +g = open('newfile.txt', 'r') +print g.read() +f.close() +g.close() +``` + + hello world: 0 + hello world: 1 + hello world: 2 + hello world: 3 + hello world: 4 + hello world: 5 + hello world: 6 + hello world: 7 + hello world: 8 + hello world: 9 + hello world: 10 + hello world: 11 + hello world: 12 + hello world: 13 + hello world: 14 + hello world: 15 + hello world: 16 + hello world: 17 + hello world: 18 + hello world: 19 + hello world: 20 + hello world: 21 + hello world: 22 + hello world: 23 + hello world: 24 + hello world: 25 + hello world: 26 + hello world: 27 + hello world: 28 + hello world: 29 + hello world: 30 + hello world: 31 + hello world: 32 + hello world: 33 + hello world: 34 + hello world: 35 + hello world: 36 + hello world: 37 + hello world: 38 + hello world: 39 + hello world: 40 + hello world: 41 + hello world: 42 + hello world: 43 + hello world: 44 + hello world: 45 + hello world: 46 + hello world: 47 + hello world: 48 + hello world: 49 + hello world: 50 + hello world: 51 + hello world: 52 + hello world: 53 + hello world: 54 + hello world: 55 + hello world: 56 + hello world: 57 + hello world: 58 + hello world: 59 + hello world: 60 + hello world: 61 + hello world: 62 + hello world: 63 + hello world: 64 + hello world: 65 + hello world: 66 + hello world: 67 + hello world: 68 + hello world: 69 + hello world: 70 + hello world: 71 + hello world: 72 + hello world: 73 + hello world: 74 + hello world: 75 + hello world: 76 + hello world: 77 + hello world: 78 + hello world: 79 + hello world: 80 + hello world: 81 + hello world: 82 + hello world: 83 + hello world: 84 + hello world: 85 + hello world: 86 + hello world: 87 + hello world: 88 + hello world: 89 + hello world: 90 + hello world: 91 + hello world: 92 + hello world: 93 + hello world: 94 + hello world: 95 + hello world: 96 + hello world: 97 + hello world: 98 + hello world: 99 + hello world: 100 + hello world: 101 + hello world: 102 + hello world: 103 + hello world: 104 + hello world: 105 + hello world: 106 + hello world: 107 + hello world: 108 + hello world: 109 + hello world: 110 + hello world: 111 + hello world: 112 + hello world: 113 + hello world: 114 + hello world: 115 + hello world: 116 + hello world: 117 + hello world: 118 + hello world: 119 + hello world: 120 + hello world: 121 + hello world: 122 + hello world: 123 + hello world: 124 + hello world: 125 + hello world: 126 + hello world: 127 + hello world: 128 + hello world: 129 + hello world: 130 + hello world: 131 + hello world: 132 + hello world: 133 + hello world: 134 + hello world: 135 + hello world: 136 + hello world: 137 + hello world: 138 + hello world: 139 + hello world: 140 + hello world: 141 + hello world: 142 + hello world: 143 + hello world: 144 + hello world: 145 + hello world: 146 + hello world: 147 + hello world: 148 + hello world: 149 + hello world: 150 + hello world: 151 + hello world: 152 + hello world: 153 + hello world: 154 + hello world: 155 + hello world: 156 + hello world: 157 + hello world: 158 + hello world: 159 + hello world: 160 + hello world: 161 + hello world: 162 + hello world: 163 + hello world: 164 + hello world: 165 + hello world: 166 + hello world: 167 + hello world: 168 + hello world: 169 + hello world: 170 + hello world: 171 + hello world: 172 + hello world: 173 + hello world: 174 + hello world: 175 + hello world: 176 + hello world: 177 + hello world: 178 + hello world: 179 + hello world: 180 + hello world: 181 + hello world: 182 + hello world: 183 + hello world: 184 + hello world: 185 + hello world: 186 + hello world: 187 + hello world: 188 + hello world: 189 + hello world: 190 + hello world: 191 + hello world: 192 + hello world: 193 + hello world: 194 + hello world: 195 + hello world: 196 + hello world: 197 + hello world: 198 + hello world: 199 + hello world: 200 + hello world: 201 + hello world: 202 + hello world: 203 + hello world: 204 + hello world: 205 + hello world: 206 + hello world: 207 + hello world: 208 + hello world: 209 + hello world: 210 + hello world: 211 + hello world: 212 + hello world: 213 + hello world: 214 + hello world: 215 + hello world: 216 + hello world: 217 + hello world: 218 + hello world: 219 + hello world: 220 + hello world: 221 + hello world: 222 + hello world: 223 + hello world: 224 + hello world: 225 + hello world: 226 + hello world: 227 + hello world: 228 + hello world: 229 + hello world: 230 + hello world: 231 + hello world: 232 + hello world: 233 + hello world: 234 + hello world: 235 + hello world: 236 + hello world: 237 + hello world: 238 + hello world: 239 + hello world: 240 + hello world: 241 + hello world: 242 + hello world: 243 + hello world: 244 + hello world: 245 + hello world: 246 + hello world: 247 + hello world: 248 + hello world: 249 + hello world: 250 + hello world: 251 + hello world: 252 + hello world: 253 + hello world: 254 + hello world: 255 + hello world: 256 + hello world: 257 + hello world: 258 + hello world: 259 + hello world: 260 + hello world: 261 + hello world: 262 + hello world: 263 + hello world: 264 + hello world: 265 + hello world: 266 + hello world: 267 + hello world: 268 + hello world: 269 + hello world: 270 + hello world: 271 + hello world: 272 + hello world: 273 + hello world: 274 + hello world: 275 + hello world: 276 + hello world: 277 + hello world: 278 + hello world: 279 + hello world: 280 + hello world: 281 + hello world: 282 + hello world: 283 + hello world: 284 + hello world: 285 + hello world: 286 + hello world: 287 + hello world: 288 + hello world: 289 + hello world: 290 + hello world: 291 + hello world: 292 + hello world: 293 + hello world: 294 + hello world: 295 + hello world: 296 + hello world: 297 + hello world: 298 + hello world: 299 + hello world: 300 + hello world: 301 + hello world: 302 + hello world: 303 + hello world: 304 + hello world: 305 + hello world: 306 + hello world: 307 + hello world: 308 + hello world: 309 + hello world: 310 + hello world: 311 + hello world: 312 + hello world: 313 + hello world: 314 + hello world: 315 + hello world: 316 + hello world: 317 + hello world: 318 + hello world: 319 + hello world: 320 + hello world: 321 + hello world: 322 + hello world: 323 + hello world: 324 + hello world: 325 + hello world: 326 + hello world: 327 + hello world: 328 + hello world: 329 + hello world: 330 + hello world: 331 + hello world: 332 + hello world: 333 + hello world: 334 + hello world: 335 + hello world: 336 + hello world: 337 + hello world: 338 + hello world: 339 + hello world: 340 + hello world: 341 + hello world: 342 + hello world: 343 + hello world: 344 + hello world: 345 + hello world: 346 + hello world: 347 + hello world: 348 + hello world: 349 + hello world: 350 + hello world: 351 + hello world: 352 + hello world: 353 + hello world: 354 + hello world: 355 + hello world: 356 + hello world: 357 + hello world: 358 + hello world: 359 + hello world: 360 + hello world: 361 + hello world: 362 + hello world: 363 + hello world: 364 + hello world: 365 + hello world: 366 + hello world: 367 + hello world: 368 + hello world: 369 + hello world: 370 + hello world: 371 + hello world: 372 + hello world: 373 + hello world: 374 + hello world: 375 + hello world: 376 + hello world: 377 + hello world: 378 + hello world: 379 + hello world: 380 + hello world: 381 + hello world: 382 + hello world: 383 + hello world: 384 + hello world: 385 + hello world: 386 + hello world: 387 + hello world: 388 + hello world: 389 + hello world: 390 + hello world: 391 + hello world: 392 + hello world: 393 + hello world: 394 + hello world: 395 + hello world: 396 + hello world: 397 + hello world: 398 + hello world: 399 + hello world: 400 + hello world: 401 + hello world: 402 + hello world: 403 + hello world: 404 + hello world: 405 + hello world: 406 + hello world: 407 + hello world: 408 + hello world: 409 + hello world: 410 + hello world: 411 + hello world: 412 + hello world: 413 + hello world: 414 + hello world: 415 + hello world: 416 + hello world: 417 + hello world: 418 + hello world: 419 + hello world: 420 + hello world: 421 + hello world: 422 + hello world: 423 + hello world: 424 + hello world: 425 + hello world: 426 + hello world: 427 + hello world: 428 + hello world: 429 + hello world: 430 + hello world: 431 + hello world: 432 + hello world: 433 + hello world: 434 + hello world: 435 + hello world: 436 + hello world: 437 + hello world: 438 + hello world: 439 + hello world: 440 + hello world: 441 + hello world: 442 + hello world: 443 + hello world: 444 + hello world: 445 + hello world: 446 + hello world: 447 + hello world: 448 + hello world: 449 + hello world: 450 + hello world: 451 + hello world: 452 + hello world: 453 + hello world: 454 + hello world: 455 + hello world: 456 + hello world: 457 + hello world: 458 + hello world: 459 + hello world: 460 + hello world: 461 + hello world: 462 + hello world: 463 + hello world: 464 + hello world: 465 + hello world: 466 + hello world: 467 + hello world: 468 + hello world: 469 + hello world: 470 + hello world: 471 + hello world: 472 + hello world: 473 + hello world: 474 + hello world: 475 + hello world: 476 + hello world: 477 + hello world: 478 + hello world: 479 + hello world: 480 + hello world: 481 + hello world: 482 + hello world: 483 + hello world: 484 + hello world: 485 + hello world: 486 + hello world: 487 + hello world: 488 + hello world: 489 + hello world: 490 + hello world: 491 + hello world: 492 + hello world: 493 + hello world: 494 + hello world: 495 + hello world: 496 + hello world: 497 + hello world: 498 + hello world: 499 + hello world: 500 + hello world: 501 + hello world: 502 + hello world: 503 + hello world: 504 + hello world: 505 + hello world: 506 + hello world: 507 + hello world: 508 + hello world: 509 + hello world: 510 + hello world: 511 + hello world: 512 + hello world: 513 + hello world: 514 + hello world: 515 + hello world: 516 + hello world: 517 + hello world: 518 + hello world: 519 + hello world: 520 + hello world: 521 + hello world: 522 + hello world: 523 + hello world: 524 + hello world: 525 + hello world: 526 + hello world: 527 + hello world: 528 + hello world: 529 + hello world: 530 + hello world: 531 + hello world: 532 + hello world: 533 + hello world: 534 + hello world: 535 + hello world: 536 + hello world: 537 + hello world: 538 + hello world: 539 + hello world: 540 + hello world: 541 + hello world: 542 + hello world: 543 + hello world: 544 + hello world: 545 + hello world: 546 + hello world: 547 + hello world: 548 + hello world: 549 + hello world: 550 + hello world: 551 + hello world: 552 + hello world: 553 + hello world: 554 + hello world: 555 + hello world: 556 + hello world: 557 + hello world: 558 + hello world: 559 + hello world: 560 + hello world: 561 + hello world: 562 + hello world: 563 + hello world: 564 + hello world: 565 + hello world: 566 + hello world: 567 + hello world: 568 + hello world: 569 + hello world: 570 + hello world: 571 + hello world: 572 + hello world: 573 + hello world: 574 + hello world: 575 + hello world: 576 + hello world: 577 + hello world: 578 + hello world: 579 + hello world: 580 + hello world: 581 + hello world: 582 + hello world: 583 + hello world: 584 + hello world: 585 + hello world: 586 + hello world: 587 + hello world: 588 + hello world: 589 + hello world: 590 + hello world: 591 + hello world: 592 + hello world: 593 + hello world: 594 + hello world: 595 + hello world: 596 + hello world: 597 + hello world: 598 + hello world: 599 + hello world: 600 + hello world: 601 + hello world: 602 + hello world: 603 + hello world: 604 + hello world: 605 + hello world: 606 + hello world: 607 + hello world: 608 + hello world: 609 + hello world: 610 + hello world: 611 + hello world: 612 + hello world: 613 + hello world: 614 + hello world: 615 + hello world: 616 + hello world: 617 + hello world: 618 + hello world: 619 + hello world: 620 + hello world: 621 + hello world: 622 + hello world: 623 + hello world: 624 + hello world: 625 + hello world: 626 + hello world: 627 + hello world: 628 + hello world: 629 + hello world: 630 + hello world: 631 + hello world: 632 + hello world: 633 + hello world: 634 + hello world: 635 + hello world: 636 + hello world: 637 + hello world: 638 + hello world: 639 + hello world: 640 + hello world: 641 + hello world: 642 + hello world: 643 + hello world: 644 + hello world: 645 + hello world: 646 + hello world: 647 + hello world: 648 + hello world: 649 + hello world: 650 + hello world: 651 + hello world: 652 + hello world: 653 + hello world: 654 + hello world: 655 + hello world: 656 + hello world: 657 + hello world: 658 + hello world: 659 + hello world: 660 + hello world: 661 + hello world: 662 + hello world: 663 + hello world: 664 + hello world: 665 + hello world: 666 + hello world: 667 + hello world: 668 + hello world: 669 + hello world: 670 + hello world: 671 + hello world: 672 + hello world: 673 + hello world: 674 + hello world: 675 + hello world: 676 + hello world: 677 + hello world: 678 + hello world: 679 + hello world: 680 + hello world: 681 + hello world: 682 + hello world: 683 + hello world: 684 + hello world: 685 + hello world: 686 + hello world: 687 + hello world: 688 + hello world: 689 + hello world: 690 + hello world: 691 + hello world: 692 + hello world: 693 + hello world: 694 + hello world: 695 + hello world: 696 + hello world: 697 + hello world: 698 + hello world: 699 + hello world: 700 + hello world: 701 + hello world: 702 + hello world: 703 + hello world: 704 + hello world: 705 + hello world: 706 + hello world: 707 + hello world: 708 + hello world: 709 + hello world: 710 + hello world: 711 + hello world: 712 + hello world: 713 + hello world: 714 + hello world: 715 + hello world: 716 + hello world: 717 + hello world: 718 + hello world: 719 + hello world: 720 + hello world: 721 + hello world: 722 + hello world: 723 + hello world: 724 + hello world: 725 + hello world: 726 + hello world: 727 + hello world: 728 + hello world: 729 + hello world: 730 + hello world: 731 + hello world: 732 + hello world: 733 + hello world: 734 + hello world: 735 + hello world: 736 + hello world: 737 + hello world: 738 + hello world: 739 + hello world: 740 + hello world: 741 + hello world: 742 + hello world: 743 + hello world: 744 + hello world: 745 + hello world: 746 + hello world: 747 + hello world: 748 + hello world: 749 + hello world: 750 + hello world: 751 + hello world: 752 + hello world: 753 + hello world: 754 + hello world: 755 + hello world: 756 + hello world: 757 + hello world: 758 + hello world: 759 + hello world: 760 + hello world: 761 + hello world: 762 + hello world: 763 + hello world: 764 + hello world: 765 + hello world: 766 + hello world: 767 + hello world: 768 + hello world: 769 + hello world: 770 + hello world: 771 + hello world: 772 + hello world: 773 + hello world: 774 + hello world: 775 + hello world: 776 + hello world: 777 + hello world: 778 + hello world: 779 + hello world: 780 + hello world: 781 + hello world: 782 + hello world: 783 + hello world: 784 + hello world: 785 + hello world: 786 + hello world: 787 + hello world: 788 + hello world: 789 + hello world: 790 + hello world: 791 + hello world: 792 + hello world: 793 + hello world: 794 + hello world: 795 + hello world: 796 + hello world: 797 + hello world: 798 + hello world: 799 + hello world: 800 + hello world: 801 + hello world: 802 + hello world: 803 + hello world: 804 + hello world: 805 + hello world: 806 + hello world: 807 + hello world: 808 + hello world: 809 + hello world: 810 + hello world: 811 + hello world: 812 + hello world: 813 + hello world: 814 + hello world: 815 + hello world: 816 + hello world: 817 + hello world: 818 + hello world: 819 + hello world: 820 + hello world: 821 + hello world: 822 + hello world: 823 + hello world: 824 + hello world: 825 + hello world: 826 + hello world: 827 + hello world: 828 + hello world: 829 + hello world: 830 + hello world: 831 + hello world: 832 + hello world: 833 + hello world: 834 + hello world: 835 + hello world: 836 + hello world: 837 + hello world: 838 + hello world: 839 + hello world: 840 + hello world: 841 + hello world: 842 + hello world: 843 + hello world: 844 + hello world: 845 + hello world: 846 + hello world: 847 + hello world: 848 + hello world: 849 + hello world: 850 + hello world: 851 + hello world: 852 + hello world: 853 + hello world: 854 + hello world: 855 + hello world: 856 + hello world: 857 + hello world: 858 + hello world: 859 + hello world: 860 + hello world: 861 + hello world: 862 + hello world: 863 + hello world: 864 + hello world: 865 + hello world: 866 + hello world: 867 + hello world: 868 + hello world: 869 + hello world: 870 + hello world: 871 + hello world: 872 + hello world: 873 + hello world: 874 + hello world: 875 + hello world: 876 + hello world: 877 + hello world: 878 + hello world: 879 + hello world: 880 + hello world: 881 + hello world: 882 + hello world: 883 + hello world: 884 + hello world: 885 + hello world: 886 + hello world: 887 + hello world: 888 + hello world: 889 + hello world: 890 + hello world: 891 + hello world: 892 + hello world: 893 + hello world: 894 + hello world: 895 + hello world: 896 + hello world: 897 + hello world: 898 + hello world: 899 + hello world: 900 + hello world: 901 + hello world: 902 + hello world: 903 + hello world: 904 + hello world: 905 + hello world: 906 + hello world: 907 + hello world: 908 + hello world: 909 + hello world: 910 + hello world: 911 + hello world: 912 + hello world: 913 + hello world: 914 + hello world: 915 + hello world: 916 + hello world: 917 + hello world: 918 + hello world: 919 + hello world: 920 + hello world: 921 + hello world: 922 + hello world: 923 + hello world: 924 + hello world: 925 + hello world: 926 + hello world: 927 + hello world: 928 + hello world: 929 + hello world: 930 + hello world: 931 + hello world: 932 + hello world: 933 + hello world: 934 + hello world: 935 + hello world: 936 + hello world: 937 + hello world: 938 + hello world: 939 + hello world: 940 + hello world: 941 + hello world: 942 + hello world: 943 + hello world: 944 + hello world: 945 + hello world: 946 + hello world: 947 + hello world: 948 + hello world: 949 + hello world: 950 + hello world: 951 + hello world: 952 + hello world: 953 + hello world: 954 + hello world: 955 + hello world: 956 + hello world: 957 + hello world: 958 + hello world: 959 + hello world: 960 + hello world: 961 + hello world: 962 + hello world: 963 + hello world: 964 + hello world: 965 + hello world: 966 + hello world: 967 + hello world: 968 + hello world: 969 + hell + + +可以看到,出现异常的时候,磁盘的写入并没有完成,为此我们可以使用 `try/except/finally` 块来关闭文件,这里 `finally` 确保关闭文件,所有的写入已经完成。 + + +```python +f = open('newfile.txt','w') +try: + for i in range(3000): + x = 1.0 / (i - 1000) + f.write('hello world: ' + str(i) + '\n') +except Exception: + print "something bad happened" +finally: + f.close() +``` + + something bad happened + + + +```python +g = open('newfile.txt', 'r') +print g.read() +g.close() +``` + + hello world: 0 + hello world: 1 + hello world: 2 + hello world: 3 + hello world: 4 + hello world: 5 + hello world: 6 + hello world: 7 + hello world: 8 + hello world: 9 + hello world: 10 + hello world: 11 + hello world: 12 + hello world: 13 + hello world: 14 + hello world: 15 + hello world: 16 + hello world: 17 + hello world: 18 + hello world: 19 + hello world: 20 + hello world: 21 + hello world: 22 + hello world: 23 + hello world: 24 + hello world: 25 + hello world: 26 + hello world: 27 + hello world: 28 + hello world: 29 + hello world: 30 + hello world: 31 + hello world: 32 + hello world: 33 + hello world: 34 + hello world: 35 + hello world: 36 + hello world: 37 + hello world: 38 + hello world: 39 + hello world: 40 + hello world: 41 + hello world: 42 + hello world: 43 + hello world: 44 + hello world: 45 + hello world: 46 + hello world: 47 + hello world: 48 + hello world: 49 + hello world: 50 + hello world: 51 + hello world: 52 + hello world: 53 + hello world: 54 + hello world: 55 + hello world: 56 + hello world: 57 + hello world: 58 + hello world: 59 + hello world: 60 + hello world: 61 + hello world: 62 + hello world: 63 + hello world: 64 + hello world: 65 + hello world: 66 + hello world: 67 + hello world: 68 + hello world: 69 + hello world: 70 + hello world: 71 + hello world: 72 + hello world: 73 + hello world: 74 + hello world: 75 + hello world: 76 + hello world: 77 + hello world: 78 + hello world: 79 + hello world: 80 + hello world: 81 + hello world: 82 + hello world: 83 + hello world: 84 + hello world: 85 + hello world: 86 + hello world: 87 + hello world: 88 + hello world: 89 + hello world: 90 + hello world: 91 + hello world: 92 + hello world: 93 + hello world: 94 + hello world: 95 + hello world: 96 + hello world: 97 + hello world: 98 + hello world: 99 + hello world: 100 + hello world: 101 + hello world: 102 + hello world: 103 + hello world: 104 + hello world: 105 + hello world: 106 + hello world: 107 + hello world: 108 + hello world: 109 + hello world: 110 + hello world: 111 + hello world: 112 + hello world: 113 + hello world: 114 + hello world: 115 + hello world: 116 + hello world: 117 + hello world: 118 + hello world: 119 + hello world: 120 + hello world: 121 + hello world: 122 + hello world: 123 + hello world: 124 + hello world: 125 + hello world: 126 + hello world: 127 + hello world: 128 + hello world: 129 + hello world: 130 + hello world: 131 + hello world: 132 + hello world: 133 + hello world: 134 + hello world: 135 + hello world: 136 + hello world: 137 + hello world: 138 + hello world: 139 + hello world: 140 + hello world: 141 + hello world: 142 + hello world: 143 + hello world: 144 + hello world: 145 + hello world: 146 + hello world: 147 + hello world: 148 + hello world: 149 + hello world: 150 + hello world: 151 + hello world: 152 + hello world: 153 + hello world: 154 + hello world: 155 + hello world: 156 + hello world: 157 + hello world: 158 + hello world: 159 + hello world: 160 + hello world: 161 + hello world: 162 + hello world: 163 + hello world: 164 + hello world: 165 + hello world: 166 + hello world: 167 + hello world: 168 + hello world: 169 + hello world: 170 + hello world: 171 + hello world: 172 + hello world: 173 + hello world: 174 + hello world: 175 + hello world: 176 + hello world: 177 + hello world: 178 + hello world: 179 + hello world: 180 + hello world: 181 + hello world: 182 + hello world: 183 + hello world: 184 + hello world: 185 + hello world: 186 + hello world: 187 + hello world: 188 + hello world: 189 + hello world: 190 + hello world: 191 + hello world: 192 + hello world: 193 + hello world: 194 + hello world: 195 + hello world: 196 + hello world: 197 + hello world: 198 + hello world: 199 + hello world: 200 + hello world: 201 + hello world: 202 + hello world: 203 + hello world: 204 + hello world: 205 + hello world: 206 + hello world: 207 + hello world: 208 + hello world: 209 + hello world: 210 + hello world: 211 + hello world: 212 + hello world: 213 + hello world: 214 + hello world: 215 + hello world: 216 + hello world: 217 + hello world: 218 + hello world: 219 + hello world: 220 + hello world: 221 + hello world: 222 + hello world: 223 + hello world: 224 + hello world: 225 + hello world: 226 + hello world: 227 + hello world: 228 + hello world: 229 + hello world: 230 + hello world: 231 + hello world: 232 + hello world: 233 + hello world: 234 + hello world: 235 + hello world: 236 + hello world: 237 + hello world: 238 + hello world: 239 + hello world: 240 + hello world: 241 + hello world: 242 + hello world: 243 + hello world: 244 + hello world: 245 + hello world: 246 + hello world: 247 + hello world: 248 + hello world: 249 + hello world: 250 + hello world: 251 + hello world: 252 + hello world: 253 + hello world: 254 + hello world: 255 + hello world: 256 + hello world: 257 + hello world: 258 + hello world: 259 + hello world: 260 + hello world: 261 + hello world: 262 + hello world: 263 + hello world: 264 + hello world: 265 + hello world: 266 + hello world: 267 + hello world: 268 + hello world: 269 + hello world: 270 + hello world: 271 + hello world: 272 + hello world: 273 + hello world: 274 + hello world: 275 + hello world: 276 + hello world: 277 + hello world: 278 + hello world: 279 + hello world: 280 + hello world: 281 + hello world: 282 + hello world: 283 + hello world: 284 + hello world: 285 + hello world: 286 + hello world: 287 + hello world: 288 + hello world: 289 + hello world: 290 + hello world: 291 + hello world: 292 + hello world: 293 + hello world: 294 + hello world: 295 + hello world: 296 + hello world: 297 + hello world: 298 + hello world: 299 + hello world: 300 + hello world: 301 + hello world: 302 + hello world: 303 + hello world: 304 + hello world: 305 + hello world: 306 + hello world: 307 + hello world: 308 + hello world: 309 + hello world: 310 + hello world: 311 + hello world: 312 + hello world: 313 + hello world: 314 + hello world: 315 + hello world: 316 + hello world: 317 + hello world: 318 + hello world: 319 + hello world: 320 + hello world: 321 + hello world: 322 + hello world: 323 + hello world: 324 + hello world: 325 + hello world: 326 + hello world: 327 + hello world: 328 + hello world: 329 + hello world: 330 + hello world: 331 + hello world: 332 + hello world: 333 + hello world: 334 + hello world: 335 + hello world: 336 + hello world: 337 + hello world: 338 + hello world: 339 + hello world: 340 + hello world: 341 + hello world: 342 + hello world: 343 + hello world: 344 + hello world: 345 + hello world: 346 + hello world: 347 + hello world: 348 + hello world: 349 + hello world: 350 + hello world: 351 + hello world: 352 + hello world: 353 + hello world: 354 + hello world: 355 + hello world: 356 + hello world: 357 + hello world: 358 + hello world: 359 + hello world: 360 + hello world: 361 + hello world: 362 + hello world: 363 + hello world: 364 + hello world: 365 + hello world: 366 + hello world: 367 + hello world: 368 + hello world: 369 + hello world: 370 + hello world: 371 + hello world: 372 + hello world: 373 + hello world: 374 + hello world: 375 + hello world: 376 + hello world: 377 + hello world: 378 + hello world: 379 + hello world: 380 + hello world: 381 + hello world: 382 + hello world: 383 + hello world: 384 + hello world: 385 + hello world: 386 + hello world: 387 + hello world: 388 + hello world: 389 + hello world: 390 + hello world: 391 + hello world: 392 + hello world: 393 + hello world: 394 + hello world: 395 + hello world: 396 + hello world: 397 + hello world: 398 + hello world: 399 + hello world: 400 + hello world: 401 + hello world: 402 + hello world: 403 + hello world: 404 + hello world: 405 + hello world: 406 + hello world: 407 + hello world: 408 + hello world: 409 + hello world: 410 + hello world: 411 + hello world: 412 + hello world: 413 + hello world: 414 + hello world: 415 + hello world: 416 + hello world: 417 + hello world: 418 + hello world: 419 + hello world: 420 + hello world: 421 + hello world: 422 + hello world: 423 + hello world: 424 + hello world: 425 + hello world: 426 + hello world: 427 + hello world: 428 + hello world: 429 + hello world: 430 + hello world: 431 + hello world: 432 + hello world: 433 + hello world: 434 + hello world: 435 + hello world: 436 + hello world: 437 + hello world: 438 + hello world: 439 + hello world: 440 + hello world: 441 + hello world: 442 + hello world: 443 + hello world: 444 + hello world: 445 + hello world: 446 + hello world: 447 + hello world: 448 + hello world: 449 + hello world: 450 + hello world: 451 + hello world: 452 + hello world: 453 + hello world: 454 + hello world: 455 + hello world: 456 + hello world: 457 + hello world: 458 + hello world: 459 + hello world: 460 + hello world: 461 + hello world: 462 + hello world: 463 + hello world: 464 + hello world: 465 + hello world: 466 + hello world: 467 + hello world: 468 + hello world: 469 + hello world: 470 + hello world: 471 + hello world: 472 + hello world: 473 + hello world: 474 + hello world: 475 + hello world: 476 + hello world: 477 + hello world: 478 + hello world: 479 + hello world: 480 + hello world: 481 + hello world: 482 + hello world: 483 + hello world: 484 + hello world: 485 + hello world: 486 + hello world: 487 + hello world: 488 + hello world: 489 + hello world: 490 + hello world: 491 + hello world: 492 + hello world: 493 + hello world: 494 + hello world: 495 + hello world: 496 + hello world: 497 + hello world: 498 + hello world: 499 + hello world: 500 + hello world: 501 + hello world: 502 + hello world: 503 + hello world: 504 + hello world: 505 + hello world: 506 + hello world: 507 + hello world: 508 + hello world: 509 + hello world: 510 + hello world: 511 + hello world: 512 + hello world: 513 + hello world: 514 + hello world: 515 + hello world: 516 + hello world: 517 + hello world: 518 + hello world: 519 + hello world: 520 + hello world: 521 + hello world: 522 + hello world: 523 + hello world: 524 + hello world: 525 + hello world: 526 + hello world: 527 + hello world: 528 + hello world: 529 + hello world: 530 + hello world: 531 + hello world: 532 + hello world: 533 + hello world: 534 + hello world: 535 + hello world: 536 + hello world: 537 + hello world: 538 + hello world: 539 + hello world: 540 + hello world: 541 + hello world: 542 + hello world: 543 + hello world: 544 + hello world: 545 + hello world: 546 + hello world: 547 + hello world: 548 + hello world: 549 + hello world: 550 + hello world: 551 + hello world: 552 + hello world: 553 + hello world: 554 + hello world: 555 + hello world: 556 + hello world: 557 + hello world: 558 + hello world: 559 + hello world: 560 + hello world: 561 + hello world: 562 + hello world: 563 + hello world: 564 + hello world: 565 + hello world: 566 + hello world: 567 + hello world: 568 + hello world: 569 + hello world: 570 + hello world: 571 + hello world: 572 + hello world: 573 + hello world: 574 + hello world: 575 + hello world: 576 + hello world: 577 + hello world: 578 + hello world: 579 + hello world: 580 + hello world: 581 + hello world: 582 + hello world: 583 + hello world: 584 + hello world: 585 + hello world: 586 + hello world: 587 + hello world: 588 + hello world: 589 + hello world: 590 + hello world: 591 + hello world: 592 + hello world: 593 + hello world: 594 + hello world: 595 + hello world: 596 + hello world: 597 + hello world: 598 + hello world: 599 + hello world: 600 + hello world: 601 + hello world: 602 + hello world: 603 + hello world: 604 + hello world: 605 + hello world: 606 + hello world: 607 + hello world: 608 + hello world: 609 + hello world: 610 + hello world: 611 + hello world: 612 + hello world: 613 + hello world: 614 + hello world: 615 + hello world: 616 + hello world: 617 + hello world: 618 + hello world: 619 + hello world: 620 + hello world: 621 + hello world: 622 + hello world: 623 + hello world: 624 + hello world: 625 + hello world: 626 + hello world: 627 + hello world: 628 + hello world: 629 + hello world: 630 + hello world: 631 + hello world: 632 + hello world: 633 + hello world: 634 + hello world: 635 + hello world: 636 + hello world: 637 + hello world: 638 + hello world: 639 + hello world: 640 + hello world: 641 + hello world: 642 + hello world: 643 + hello world: 644 + hello world: 645 + hello world: 646 + hello world: 647 + hello world: 648 + hello world: 649 + hello world: 650 + hello world: 651 + hello world: 652 + hello world: 653 + hello world: 654 + hello world: 655 + hello world: 656 + hello world: 657 + hello world: 658 + hello world: 659 + hello world: 660 + hello world: 661 + hello world: 662 + hello world: 663 + hello world: 664 + hello world: 665 + hello world: 666 + hello world: 667 + hello world: 668 + hello world: 669 + hello world: 670 + hello world: 671 + hello world: 672 + hello world: 673 + hello world: 674 + hello world: 675 + hello world: 676 + hello world: 677 + hello world: 678 + hello world: 679 + hello world: 680 + hello world: 681 + hello world: 682 + hello world: 683 + hello world: 684 + hello world: 685 + hello world: 686 + hello world: 687 + hello world: 688 + hello world: 689 + hello world: 690 + hello world: 691 + hello world: 692 + hello world: 693 + hello world: 694 + hello world: 695 + hello world: 696 + hello world: 697 + hello world: 698 + hello world: 699 + hello world: 700 + hello world: 701 + hello world: 702 + hello world: 703 + hello world: 704 + hello world: 705 + hello world: 706 + hello world: 707 + hello world: 708 + hello world: 709 + hello world: 710 + hello world: 711 + hello world: 712 + hello world: 713 + hello world: 714 + hello world: 715 + hello world: 716 + hello world: 717 + hello world: 718 + hello world: 719 + hello world: 720 + hello world: 721 + hello world: 722 + hello world: 723 + hello world: 724 + hello world: 725 + hello world: 726 + hello world: 727 + hello world: 728 + hello world: 729 + hello world: 730 + hello world: 731 + hello world: 732 + hello world: 733 + hello world: 734 + hello world: 735 + hello world: 736 + hello world: 737 + hello world: 738 + hello world: 739 + hello world: 740 + hello world: 741 + hello world: 742 + hello world: 743 + hello world: 744 + hello world: 745 + hello world: 746 + hello world: 747 + hello world: 748 + hello world: 749 + hello world: 750 + hello world: 751 + hello world: 752 + hello world: 753 + hello world: 754 + hello world: 755 + hello world: 756 + hello world: 757 + hello world: 758 + hello world: 759 + hello world: 760 + hello world: 761 + hello world: 762 + hello world: 763 + hello world: 764 + hello world: 765 + hello world: 766 + hello world: 767 + hello world: 768 + hello world: 769 + hello world: 770 + hello world: 771 + hello world: 772 + hello world: 773 + hello world: 774 + hello world: 775 + hello world: 776 + hello world: 777 + hello world: 778 + hello world: 779 + hello world: 780 + hello world: 781 + hello world: 782 + hello world: 783 + hello world: 784 + hello world: 785 + hello world: 786 + hello world: 787 + hello world: 788 + hello world: 789 + hello world: 790 + hello world: 791 + hello world: 792 + hello world: 793 + hello world: 794 + hello world: 795 + hello world: 796 + hello world: 797 + hello world: 798 + hello world: 799 + hello world: 800 + hello world: 801 + hello world: 802 + hello world: 803 + hello world: 804 + hello world: 805 + hello world: 806 + hello world: 807 + hello world: 808 + hello world: 809 + hello world: 810 + hello world: 811 + hello world: 812 + hello world: 813 + hello world: 814 + hello world: 815 + hello world: 816 + hello world: 817 + hello world: 818 + hello world: 819 + hello world: 820 + hello world: 821 + hello world: 822 + hello world: 823 + hello world: 824 + hello world: 825 + hello world: 826 + hello world: 827 + hello world: 828 + hello world: 829 + hello world: 830 + hello world: 831 + hello world: 832 + hello world: 833 + hello world: 834 + hello world: 835 + hello world: 836 + hello world: 837 + hello world: 838 + hello world: 839 + hello world: 840 + hello world: 841 + hello world: 842 + hello world: 843 + hello world: 844 + hello world: 845 + hello world: 846 + hello world: 847 + hello world: 848 + hello world: 849 + hello world: 850 + hello world: 851 + hello world: 852 + hello world: 853 + hello world: 854 + hello world: 855 + hello world: 856 + hello world: 857 + hello world: 858 + hello world: 859 + hello world: 860 + hello world: 861 + hello world: 862 + hello world: 863 + hello world: 864 + hello world: 865 + hello world: 866 + hello world: 867 + hello world: 868 + hello world: 869 + hello world: 870 + hello world: 871 + hello world: 872 + hello world: 873 + hello world: 874 + hello world: 875 + hello world: 876 + hello world: 877 + hello world: 878 + hello world: 879 + hello world: 880 + hello world: 881 + hello world: 882 + hello world: 883 + hello world: 884 + hello world: 885 + hello world: 886 + hello world: 887 + hello world: 888 + hello world: 889 + hello world: 890 + hello world: 891 + hello world: 892 + hello world: 893 + hello world: 894 + hello world: 895 + hello world: 896 + hello world: 897 + hello world: 898 + hello world: 899 + hello world: 900 + hello world: 901 + hello world: 902 + hello world: 903 + hello world: 904 + hello world: 905 + hello world: 906 + hello world: 907 + hello world: 908 + hello world: 909 + hello world: 910 + hello world: 911 + hello world: 912 + hello world: 913 + hello world: 914 + hello world: 915 + hello world: 916 + hello world: 917 + hello world: 918 + hello world: 919 + hello world: 920 + hello world: 921 + hello world: 922 + hello world: 923 + hello world: 924 + hello world: 925 + hello world: 926 + hello world: 927 + hello world: 928 + hello world: 929 + hello world: 930 + hello world: 931 + hello world: 932 + hello world: 933 + hello world: 934 + hello world: 935 + hello world: 936 + hello world: 937 + hello world: 938 + hello world: 939 + hello world: 940 + hello world: 941 + hello world: 942 + hello world: 943 + hello world: 944 + hello world: 945 + hello world: 946 + hello world: 947 + hello world: 948 + hello world: 949 + hello world: 950 + hello world: 951 + hello world: 952 + hello world: 953 + hello world: 954 + hello world: 955 + hello world: 956 + hello world: 957 + hello world: 958 + hello world: 959 + hello world: 960 + hello world: 961 + hello world: 962 + hello world: 963 + hello world: 964 + hello world: 965 + hello world: 966 + hello world: 967 + hello world: 968 + hello world: 969 + hello world: 970 + hello world: 971 + hello world: 972 + hello world: 973 + hello world: 974 + hello world: 975 + hello world: 976 + hello world: 977 + hello world: 978 + hello world: 979 + hello world: 980 + hello world: 981 + hello world: 982 + hello world: 983 + hello world: 984 + hello world: 985 + hello world: 986 + hello world: 987 + hello world: 988 + hello world: 989 + hello world: 990 + hello world: 991 + hello world: 992 + hello world: 993 + hello world: 994 + hello world: 995 + hello world: 996 + hello world: 997 + hello world: 998 + hello world: 999 + + + +## with 方法 + +事实上,**Python**提供了更安全的方法,当 `with` 块的内容结束后,**Python**会自动调用它的`close` 方法,确保读写的安全: + + +```python +with open('newfile.txt','w') as f: + for i in range(3000): + x = 1.0 / (i - 1000) + f.write('hello world: ' + str(i) + '\n') +``` + + + --------------------------------------------------------------------------- + + ZeroDivisionError Traceback (most recent call last) + + in () + 1 with open('newfile.txt','w') as f: + 2 for i in range(3000): + ----> 3 x = 1.0 / (i - 1000) + 4 f.write('hello world: ' + str(i) + '\n') + + + ZeroDivisionError: float division by zero + + +与 `try/exception/finally` 效果相同,但更简单。 + + +```python +g = open('newfile.txt', 'r') +print g.read() +g.close() +``` + + hello world: 0 + hello world: 1 + hello world: 2 + hello world: 3 + hello world: 4 + hello world: 5 + hello world: 6 + hello world: 7 + hello world: 8 + hello world: 9 + hello world: 10 + hello world: 11 + hello world: 12 + hello world: 13 + hello world: 14 + hello world: 15 + hello world: 16 + hello world: 17 + hello world: 18 + hello world: 19 + hello world: 20 + hello world: 21 + hello world: 22 + hello world: 23 + hello world: 24 + hello world: 25 + hello world: 26 + hello world: 27 + hello world: 28 + hello world: 29 + hello world: 30 + hello world: 31 + hello world: 32 + hello world: 33 + hello world: 34 + hello world: 35 + hello world: 36 + hello world: 37 + hello world: 38 + hello world: 39 + hello world: 40 + hello world: 41 + hello world: 42 + hello world: 43 + hello world: 44 + hello world: 45 + hello world: 46 + hello world: 47 + hello world: 48 + hello world: 49 + hello world: 50 + hello world: 51 + hello world: 52 + hello world: 53 + hello world: 54 + hello world: 55 + hello world: 56 + hello world: 57 + hello world: 58 + hello world: 59 + hello world: 60 + hello world: 61 + hello world: 62 + hello world: 63 + hello world: 64 + hello world: 65 + hello world: 66 + hello world: 67 + hello world: 68 + hello world: 69 + hello world: 70 + hello world: 71 + hello world: 72 + hello world: 73 + hello world: 74 + hello world: 75 + hello world: 76 + hello world: 77 + hello world: 78 + hello world: 79 + hello world: 80 + hello world: 81 + hello world: 82 + hello world: 83 + hello world: 84 + hello world: 85 + hello world: 86 + hello world: 87 + hello world: 88 + hello world: 89 + hello world: 90 + hello world: 91 + hello world: 92 + hello world: 93 + hello world: 94 + hello world: 95 + hello world: 96 + hello world: 97 + hello world: 98 + hello world: 99 + hello world: 100 + hello world: 101 + hello world: 102 + hello world: 103 + hello world: 104 + hello world: 105 + hello world: 106 + hello world: 107 + hello world: 108 + hello world: 109 + hello world: 110 + hello world: 111 + hello world: 112 + hello world: 113 + hello world: 114 + hello world: 115 + hello world: 116 + hello world: 117 + hello world: 118 + hello world: 119 + hello world: 120 + hello world: 121 + hello world: 122 + hello world: 123 + hello world: 124 + hello world: 125 + hello world: 126 + hello world: 127 + hello world: 128 + hello world: 129 + hello world: 130 + hello world: 131 + hello world: 132 + hello world: 133 + hello world: 134 + hello world: 135 + hello world: 136 + hello world: 137 + hello world: 138 + hello world: 139 + hello world: 140 + hello world: 141 + hello world: 142 + hello world: 143 + hello world: 144 + hello world: 145 + hello world: 146 + hello world: 147 + hello world: 148 + hello world: 149 + hello world: 150 + hello world: 151 + hello world: 152 + hello world: 153 + hello world: 154 + hello world: 155 + hello world: 156 + hello world: 157 + hello world: 158 + hello world: 159 + hello world: 160 + hello world: 161 + hello world: 162 + hello world: 163 + hello world: 164 + hello world: 165 + hello world: 166 + hello world: 167 + hello world: 168 + hello world: 169 + hello world: 170 + hello world: 171 + hello world: 172 + hello world: 173 + hello world: 174 + hello world: 175 + hello world: 176 + hello world: 177 + hello world: 178 + hello world: 179 + hello world: 180 + hello world: 181 + hello world: 182 + hello world: 183 + hello world: 184 + hello world: 185 + hello world: 186 + hello world: 187 + hello world: 188 + hello world: 189 + hello world: 190 + hello world: 191 + hello world: 192 + hello world: 193 + hello world: 194 + hello world: 195 + hello world: 196 + hello world: 197 + hello world: 198 + hello world: 199 + hello world: 200 + hello world: 201 + hello world: 202 + hello world: 203 + hello world: 204 + hello world: 205 + hello world: 206 + hello world: 207 + hello world: 208 + hello world: 209 + hello world: 210 + hello world: 211 + hello world: 212 + hello world: 213 + hello world: 214 + hello world: 215 + hello world: 216 + hello world: 217 + hello world: 218 + hello world: 219 + hello world: 220 + hello world: 221 + hello world: 222 + hello world: 223 + hello world: 224 + hello world: 225 + hello world: 226 + hello world: 227 + hello world: 228 + hello world: 229 + hello world: 230 + hello world: 231 + hello world: 232 + hello world: 233 + hello world: 234 + hello world: 235 + hello world: 236 + hello world: 237 + hello world: 238 + hello world: 239 + hello world: 240 + hello world: 241 + hello world: 242 + hello world: 243 + hello world: 244 + hello world: 245 + hello world: 246 + hello world: 247 + hello world: 248 + hello world: 249 + hello world: 250 + hello world: 251 + hello world: 252 + hello world: 253 + hello world: 254 + hello world: 255 + hello world: 256 + hello world: 257 + hello world: 258 + hello world: 259 + hello world: 260 + hello world: 261 + hello world: 262 + hello world: 263 + hello world: 264 + hello world: 265 + hello world: 266 + hello world: 267 + hello world: 268 + hello world: 269 + hello world: 270 + hello world: 271 + hello world: 272 + hello world: 273 + hello world: 274 + hello world: 275 + hello world: 276 + hello world: 277 + hello world: 278 + hello world: 279 + hello world: 280 + hello world: 281 + hello world: 282 + hello world: 283 + hello world: 284 + hello world: 285 + hello world: 286 + hello world: 287 + hello world: 288 + hello world: 289 + hello world: 290 + hello world: 291 + hello world: 292 + hello world: 293 + hello world: 294 + hello world: 295 + hello world: 296 + hello world: 297 + hello world: 298 + hello world: 299 + hello world: 300 + hello world: 301 + hello world: 302 + hello world: 303 + hello world: 304 + hello world: 305 + hello world: 306 + hello world: 307 + hello world: 308 + hello world: 309 + hello world: 310 + hello world: 311 + hello world: 312 + hello world: 313 + hello world: 314 + hello world: 315 + hello world: 316 + hello world: 317 + hello world: 318 + hello world: 319 + hello world: 320 + hello world: 321 + hello world: 322 + hello world: 323 + hello world: 324 + hello world: 325 + hello world: 326 + hello world: 327 + hello world: 328 + hello world: 329 + hello world: 330 + hello world: 331 + hello world: 332 + hello world: 333 + hello world: 334 + hello world: 335 + hello world: 336 + hello world: 337 + hello world: 338 + hello world: 339 + hello world: 340 + hello world: 341 + hello world: 342 + hello world: 343 + hello world: 344 + hello world: 345 + hello world: 346 + hello world: 347 + hello world: 348 + hello world: 349 + hello world: 350 + hello world: 351 + hello world: 352 + hello world: 353 + hello world: 354 + hello world: 355 + hello world: 356 + hello world: 357 + hello world: 358 + hello world: 359 + hello world: 360 + hello world: 361 + hello world: 362 + hello world: 363 + hello world: 364 + hello world: 365 + hello world: 366 + hello world: 367 + hello world: 368 + hello world: 369 + hello world: 370 + hello world: 371 + hello world: 372 + hello world: 373 + hello world: 374 + hello world: 375 + hello world: 376 + hello world: 377 + hello world: 378 + hello world: 379 + hello world: 380 + hello world: 381 + hello world: 382 + hello world: 383 + hello world: 384 + hello world: 385 + hello world: 386 + hello world: 387 + hello world: 388 + hello world: 389 + hello world: 390 + hello world: 391 + hello world: 392 + hello world: 393 + hello world: 394 + hello world: 395 + hello world: 396 + hello world: 397 + hello world: 398 + hello world: 399 + hello world: 400 + hello world: 401 + hello world: 402 + hello world: 403 + hello world: 404 + hello world: 405 + hello world: 406 + hello world: 407 + hello world: 408 + hello world: 409 + hello world: 410 + hello world: 411 + hello world: 412 + hello world: 413 + hello world: 414 + hello world: 415 + hello world: 416 + hello world: 417 + hello world: 418 + hello world: 419 + hello world: 420 + hello world: 421 + hello world: 422 + hello world: 423 + hello world: 424 + hello world: 425 + hello world: 426 + hello world: 427 + hello world: 428 + hello world: 429 + hello world: 430 + hello world: 431 + hello world: 432 + hello world: 433 + hello world: 434 + hello world: 435 + hello world: 436 + hello world: 437 + hello world: 438 + hello world: 439 + hello world: 440 + hello world: 441 + hello world: 442 + hello world: 443 + hello world: 444 + hello world: 445 + hello world: 446 + hello world: 447 + hello world: 448 + hello world: 449 + hello world: 450 + hello world: 451 + hello world: 452 + hello world: 453 + hello world: 454 + hello world: 455 + hello world: 456 + hello world: 457 + hello world: 458 + hello world: 459 + hello world: 460 + hello world: 461 + hello world: 462 + hello world: 463 + hello world: 464 + hello world: 465 + hello world: 466 + hello world: 467 + hello world: 468 + hello world: 469 + hello world: 470 + hello world: 471 + hello world: 472 + hello world: 473 + hello world: 474 + hello world: 475 + hello world: 476 + hello world: 477 + hello world: 478 + hello world: 479 + hello world: 480 + hello world: 481 + hello world: 482 + hello world: 483 + hello world: 484 + hello world: 485 + hello world: 486 + hello world: 487 + hello world: 488 + hello world: 489 + hello world: 490 + hello world: 491 + hello world: 492 + hello world: 493 + hello world: 494 + hello world: 495 + hello world: 496 + hello world: 497 + hello world: 498 + hello world: 499 + hello world: 500 + hello world: 501 + hello world: 502 + hello world: 503 + hello world: 504 + hello world: 505 + hello world: 506 + hello world: 507 + hello world: 508 + hello world: 509 + hello world: 510 + hello world: 511 + hello world: 512 + hello world: 513 + hello world: 514 + hello world: 515 + hello world: 516 + hello world: 517 + hello world: 518 + hello world: 519 + hello world: 520 + hello world: 521 + hello world: 522 + hello world: 523 + hello world: 524 + hello world: 525 + hello world: 526 + hello world: 527 + hello world: 528 + hello world: 529 + hello world: 530 + hello world: 531 + hello world: 532 + hello world: 533 + hello world: 534 + hello world: 535 + hello world: 536 + hello world: 537 + hello world: 538 + hello world: 539 + hello world: 540 + hello world: 541 + hello world: 542 + hello world: 543 + hello world: 544 + hello world: 545 + hello world: 546 + hello world: 547 + hello world: 548 + hello world: 549 + hello world: 550 + hello world: 551 + hello world: 552 + hello world: 553 + hello world: 554 + hello world: 555 + hello world: 556 + hello world: 557 + hello world: 558 + hello world: 559 + hello world: 560 + hello world: 561 + hello world: 562 + hello world: 563 + hello world: 564 + hello world: 565 + hello world: 566 + hello world: 567 + hello world: 568 + hello world: 569 + hello world: 570 + hello world: 571 + hello world: 572 + hello world: 573 + hello world: 574 + hello world: 575 + hello world: 576 + hello world: 577 + hello world: 578 + hello world: 579 + hello world: 580 + hello world: 581 + hello world: 582 + hello world: 583 + hello world: 584 + hello world: 585 + hello world: 586 + hello world: 587 + hello world: 588 + hello world: 589 + hello world: 590 + hello world: 591 + hello world: 592 + hello world: 593 + hello world: 594 + hello world: 595 + hello world: 596 + hello world: 597 + hello world: 598 + hello world: 599 + hello world: 600 + hello world: 601 + hello world: 602 + hello world: 603 + hello world: 604 + hello world: 605 + hello world: 606 + hello world: 607 + hello world: 608 + hello world: 609 + hello world: 610 + hello world: 611 + hello world: 612 + hello world: 613 + hello world: 614 + hello world: 615 + hello world: 616 + hello world: 617 + hello world: 618 + hello world: 619 + hello world: 620 + hello world: 621 + hello world: 622 + hello world: 623 + hello world: 624 + hello world: 625 + hello world: 626 + hello world: 627 + hello world: 628 + hello world: 629 + hello world: 630 + hello world: 631 + hello world: 632 + hello world: 633 + hello world: 634 + hello world: 635 + hello world: 636 + hello world: 637 + hello world: 638 + hello world: 639 + hello world: 640 + hello world: 641 + hello world: 642 + hello world: 643 + hello world: 644 + hello world: 645 + hello world: 646 + hello world: 647 + hello world: 648 + hello world: 649 + hello world: 650 + hello world: 651 + hello world: 652 + hello world: 653 + hello world: 654 + hello world: 655 + hello world: 656 + hello world: 657 + hello world: 658 + hello world: 659 + hello world: 660 + hello world: 661 + hello world: 662 + hello world: 663 + hello world: 664 + hello world: 665 + hello world: 666 + hello world: 667 + hello world: 668 + hello world: 669 + hello world: 670 + hello world: 671 + hello world: 672 + hello world: 673 + hello world: 674 + hello world: 675 + hello world: 676 + hello world: 677 + hello world: 678 + hello world: 679 + hello world: 680 + hello world: 681 + hello world: 682 + hello world: 683 + hello world: 684 + hello world: 685 + hello world: 686 + hello world: 687 + hello world: 688 + hello world: 689 + hello world: 690 + hello world: 691 + hello world: 692 + hello world: 693 + hello world: 694 + hello world: 695 + hello world: 696 + hello world: 697 + hello world: 698 + hello world: 699 + hello world: 700 + hello world: 701 + hello world: 702 + hello world: 703 + hello world: 704 + hello world: 705 + hello world: 706 + hello world: 707 + hello world: 708 + hello world: 709 + hello world: 710 + hello world: 711 + hello world: 712 + hello world: 713 + hello world: 714 + hello world: 715 + hello world: 716 + hello world: 717 + hello world: 718 + hello world: 719 + hello world: 720 + hello world: 721 + hello world: 722 + hello world: 723 + hello world: 724 + hello world: 725 + hello world: 726 + hello world: 727 + hello world: 728 + hello world: 729 + hello world: 730 + hello world: 731 + hello world: 732 + hello world: 733 + hello world: 734 + hello world: 735 + hello world: 736 + hello world: 737 + hello world: 738 + hello world: 739 + hello world: 740 + hello world: 741 + hello world: 742 + hello world: 743 + hello world: 744 + hello world: 745 + hello world: 746 + hello world: 747 + hello world: 748 + hello world: 749 + hello world: 750 + hello world: 751 + hello world: 752 + hello world: 753 + hello world: 754 + hello world: 755 + hello world: 756 + hello world: 757 + hello world: 758 + hello world: 759 + hello world: 760 + hello world: 761 + hello world: 762 + hello world: 763 + hello world: 764 + hello world: 765 + hello world: 766 + hello world: 767 + hello world: 768 + hello world: 769 + hello world: 770 + hello world: 771 + hello world: 772 + hello world: 773 + hello world: 774 + hello world: 775 + hello world: 776 + hello world: 777 + hello world: 778 + hello world: 779 + hello world: 780 + hello world: 781 + hello world: 782 + hello world: 783 + hello world: 784 + hello world: 785 + hello world: 786 + hello world: 787 + hello world: 788 + hello world: 789 + hello world: 790 + hello world: 791 + hello world: 792 + hello world: 793 + hello world: 794 + hello world: 795 + hello world: 796 + hello world: 797 + hello world: 798 + hello world: 799 + hello world: 800 + hello world: 801 + hello world: 802 + hello world: 803 + hello world: 804 + hello world: 805 + hello world: 806 + hello world: 807 + hello world: 808 + hello world: 809 + hello world: 810 + hello world: 811 + hello world: 812 + hello world: 813 + hello world: 814 + hello world: 815 + hello world: 816 + hello world: 817 + hello world: 818 + hello world: 819 + hello world: 820 + hello world: 821 + hello world: 822 + hello world: 823 + hello world: 824 + hello world: 825 + hello world: 826 + hello world: 827 + hello world: 828 + hello world: 829 + hello world: 830 + hello world: 831 + hello world: 832 + hello world: 833 + hello world: 834 + hello world: 835 + hello world: 836 + hello world: 837 + hello world: 838 + hello world: 839 + hello world: 840 + hello world: 841 + hello world: 842 + hello world: 843 + hello world: 844 + hello world: 845 + hello world: 846 + hello world: 847 + hello world: 848 + hello world: 849 + hello world: 850 + hello world: 851 + hello world: 852 + hello world: 853 + hello world: 854 + hello world: 855 + hello world: 856 + hello world: 857 + hello world: 858 + hello world: 859 + hello world: 860 + hello world: 861 + hello world: 862 + hello world: 863 + hello world: 864 + hello world: 865 + hello world: 866 + hello world: 867 + hello world: 868 + hello world: 869 + hello world: 870 + hello world: 871 + hello world: 872 + hello world: 873 + hello world: 874 + hello world: 875 + hello world: 876 + hello world: 877 + hello world: 878 + hello world: 879 + hello world: 880 + hello world: 881 + hello world: 882 + hello world: 883 + hello world: 884 + hello world: 885 + hello world: 886 + hello world: 887 + hello world: 888 + hello world: 889 + hello world: 890 + hello world: 891 + hello world: 892 + hello world: 893 + hello world: 894 + hello world: 895 + hello world: 896 + hello world: 897 + hello world: 898 + hello world: 899 + hello world: 900 + hello world: 901 + hello world: 902 + hello world: 903 + hello world: 904 + hello world: 905 + hello world: 906 + hello world: 907 + hello world: 908 + hello world: 909 + hello world: 910 + hello world: 911 + hello world: 912 + hello world: 913 + hello world: 914 + hello world: 915 + hello world: 916 + hello world: 917 + hello world: 918 + hello world: 919 + hello world: 920 + hello world: 921 + hello world: 922 + hello world: 923 + hello world: 924 + hello world: 925 + hello world: 926 + hello world: 927 + hello world: 928 + hello world: 929 + hello world: 930 + hello world: 931 + hello world: 932 + hello world: 933 + hello world: 934 + hello world: 935 + hello world: 936 + hello world: 937 + hello world: 938 + hello world: 939 + hello world: 940 + hello world: 941 + hello world: 942 + hello world: 943 + hello world: 944 + hello world: 945 + hello world: 946 + hello world: 947 + hello world: 948 + hello world: 949 + hello world: 950 + hello world: 951 + hello world: 952 + hello world: 953 + hello world: 954 + hello world: 955 + hello world: 956 + hello world: 957 + hello world: 958 + hello world: 959 + hello world: 960 + hello world: 961 + hello world: 962 + hello world: 963 + hello world: 964 + hello world: 965 + hello world: 966 + hello world: 967 + hello world: 968 + hello world: 969 + hello world: 970 + hello world: 971 + hello world: 972 + hello world: 973 + hello world: 974 + hello world: 975 + hello world: 976 + hello world: 977 + hello world: 978 + hello world: 979 + hello world: 980 + hello world: 981 + hello world: 982 + hello world: 983 + hello world: 984 + hello world: 985 + hello world: 986 + hello world: 987 + hello world: 988 + hello world: 989 + hello world: 990 + hello world: 991 + hello world: 992 + hello world: 993 + hello world: 994 + hello world: 995 + hello world: 996 + hello world: 997 + hello world: 998 + hello world: 999 + + + +所以,写文件时候要确保文件被正确关闭。 + + +```python +import os +os.remove('newfile.txt') +``` diff --git a/docs/03-numpy/03.01-numpy-overview.md b/docs/03-numpy/03.01-numpy-overview.md new file mode 100644 index 00000000..3bb367b8 --- /dev/null +++ b/docs/03-numpy/03.01-numpy-overview.md @@ -0,0 +1,351 @@ + +# Numpy 简介 + +## 导入numpy + +**Numpy**是**Python**的一个很重要的第三方库,很多其他科学计算的第三方库都是以**Numpy**为基础建立的。 + +**Numpy**的一个重要特性是它的数组计算。 + +在使用**Numpy**之前,我们需要导入`numpy`包: + + +```python +from numpy import * +``` + +使用前一定要先导入 Numpy 包,导入的方法有以下几种: + +```python + import numpy + import numpy as np + from numpy import * + from numpy import array, sin +``` + +事实上,在**ipython**中可以使用magic命令来快速导入**Numpy**的内容。 + + +```python +%pylab +``` + + Using matplotlib backend: Qt4Agg + Populating the interactive namespace from numpy and matplotlib + + +## 数组上的数学操作 + +假如我们想将列表中的每个元素增加`1`,但列表不支持这样的操作(报错): + + +```python +a = [1, 2, 3, 4] +a + 1 +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + 1 a = [1, 2, 3, 4] + ----> 2 a + 1 + + + TypeError: can only concatenate list (not "int") to list + + +转成 `array` : + + +```python +a = array(a) +a +``` + + + + + array([1, 2, 3, 4]) + + + +`array` 数组支持每个元素加 `1` 这样的操作: + + +```python +a + 1 +``` + + + + + array([2, 3, 4, 5]) + + + +与另一个 `array` 相加,得到对应元素相加的结果: + + +```python +b = array([2, 3, 4, 5]) +a + b +``` + + + + + array([3, 5, 7, 9]) + + + +对应元素相乘: + + +```python +a * b +``` + + + + + array([ 2, 6, 12, 20]) + + + +对应元素乘方: + + +```python +a ** b +``` + + + + + array([ 1, 8, 81, 1024]) + + + +## 提取数组中的元素 + +提取第一个元素: + + +```python +a[0] +``` + + + + + 1 + + + +提取前两个元素: + + +```python +a[:2] +``` + + + + + array([1, 2]) + + + +最后两个元素: + + +```python +a[-2:] +``` + + + + + array([3, 4]) + + + +将它们相加: + + +```python +a[:2] + a[-2:] +``` + + + + + array([4, 6]) + + + +## 修改数组形状 + +查看 `array` 的形状: + + +```python +a.shape +``` + + + + + (4L,) + + + +修改 `array` 的形状: + + +```python +a.shape = 2,2 +a +``` + + + + + array([[1, 2], + [3, 4]]) + + + +## 多维数组 + +`a` 现在变成了一个二维的数组,可以进行加法: + + +```python +a + a +``` + + + + + array([[2, 4], + [6, 8]]) + + + +乘法仍然是对应元素的乘积,并不是按照矩阵乘法来计算: + + +```python +a * a +``` + + + + + array([[ 1, 4], + [ 9, 16]]) + + + +## 画图 + +linspace 用来生成一组等间隔的数据: + + +```python +a = linspace(0, 2*pi, 21) +%precision 3 +a +``` + + + + + array([ 0. , 0.314, 0.628, 0.942, 1.257, 1.571, 1.885, 2.199, + 2.513, 2.827, 3.142, 3.456, 3.77 , 4.084, 4.398, 4.712, + 5.027, 5.341, 5.655, 5.969, 6.283]) + + + +三角函数: + + +```python +b = sin(a) +b +``` + + + + + array([ 0.000e+00, 3.090e-01, 5.878e-01, 8.090e-01, 9.511e-01, + 1.000e+00, 9.511e-01, 8.090e-01, 5.878e-01, 3.090e-01, + 1.225e-16, -3.090e-01, -5.878e-01, -8.090e-01, -9.511e-01, + -1.000e+00, -9.511e-01, -8.090e-01, -5.878e-01, -3.090e-01, + -2.449e-16]) + + + +画出图像: + + +```python +%matplotlib inline +plot(a, b) +``` + + + + + [] + + + + +![png](output_44_1.png) + + +## 从数组中选择元素 + +假设我们想选取数组b中所有非负的部分,首先可以利用 `b` 产生一组布尔值: + + +```python +b >= 0 +``` + + + + + array([ True, True, True, True, True, True, True, True, True, + True, True, False, False, False, False, False, False, False, + False, False, False], dtype=bool) + + + + +```python +mask = b >= 0 +``` + +画出所有对应的非负值对应的点: + + +```python +plot(a[mask], b[mask], 'ro') +``` + + + + + [] + + + + +![png](output_50_1.png) + diff --git a/docs/03-numpy/03.02-matplotlib-basics.md b/docs/03-numpy/03.02-matplotlib-basics.md new file mode 100644 index 00000000..2011d9c9 --- /dev/null +++ b/docs/03-numpy/03.02-matplotlib-basics.md @@ -0,0 +1,670 @@ + +# Matplotlib 基础 + +在使用**Numpy**之前,需要了解一些画图的基础。 + +**Matplotlib**是一个类似**Matlab**的工具包,主页地址为 + +http://matplotlib.org + +导入 `matplotlib` 和 `numpy`: + + +```python +%pylab +``` + + Using matplotlib backend: Qt4Agg + Populating the interactive namespace from numpy and matplotlib + + +## plot 二维图 + +```python +plot(y) +plot(x, y) +plot(x, y, format_string) +``` + +只给定 `y` 值,默认以下标为 `x` 轴: + + +```python +%matplotlib inline +x = linspace(0, 2 * pi, 50) +plot(sin(x)) +``` + + + + + [] + + + + +![png](output_5_1.png) + + +给定 `x` 和 `y` 值: + + +```python +plot(x, sin(x)) +``` + + + + + [] + + + + +![png](output_7_1.png) + + +多条数据线: + + +```python +plot(x, sin(x), + x, sin(2 * x)) +``` + + + + + [, + ] + + + + +![png](output_9_1.png) + + +使用字符串,给定线条参数: + + +```python +plot(x, sin(x), 'r-^') +``` + + + + + [] + + + + +![png](output_11_1.png) + + +多线条: + + +```python +plot(x, sin(x), 'b-o', + x, sin(2 * x), 'r-^') +``` + + + + + [, + ] + + + + +![png](output_13_1.png) + + +更多参数设置,请查阅帮助。事实上,字符串使用的格式与**Matlab**相同。 + +## scatter 散点图 + +```python +scatter(x, y) +scatter(x, y, size) +scatter(x, y, size, color) +``` + +假设我们想画二维散点图: + + +```python +plot(x, sin(x), 'bo') +``` + + + + + [] + + + + +![png](output_17_1.png) + + +可以使用 `scatter` 达到同样的效果: + + +```python +scatter(x, sin(x)) +``` + + + + + + + + + +![png](output_19_1.png) + + +事实上,scatter函数与**Matlab**的用法相同,还可以指定它的大小,颜色等参数: + + +```python +x = rand(200) +y = rand(200) +size = rand(200) * 30 +color = rand(200) +scatter(x, y, size, color) +# 显示颜色条 +colorbar() +``` + + + + + + + + + +![png](output_21_1.png) + + +## 多图 + +使用figure()命令产生新的图像: + + +```python +t = linspace(0, 2*pi, 50) +x = sin(t) +y = cos(t) +figure() +plot(x) +figure() +plot(y) +``` + + + + + [] + + + + +![png](output_24_1.png) + + + +![png](output_24_2.png) + + +或者使用 `subplot` 在一幅图中画多幅子图: + + subplot(row, column, index) + + +```python +subplot(1, 2, 1) +plot(x) +subplot(1, 2, 2) +plot(y) +``` + + + + + [] + + + + +![png](output_26_1.png) + + +## 向图中添加数据 + +默认多次 `plot` 会叠加: + + +```python +plot(x) +plot(y) +``` + + + + + [] + + + + +![png](output_29_1.png) + + +可以跟**Matlab**类似用 hold(False)关掉,这样新图会将原图覆盖: + + +```python +plot(x) +hold(False) +plot(y) +# 恢复原来设定 +hold(True) +``` + + + + + [] + + + + +![png](output_31_1.png) + + +## 标签 + +可以在 `plot` 中加入 `label` ,使用 `legend` 加上图例: + + +```python +plot(x, label='sin') +plot(y, label='cos') +legend() +``` + + + + + + + + + +![png](output_34_1.png) + + +或者直接在 `legend`中加入: + + +```python +plot(x) +plot(y) +legend(['sin', 'cos']) +``` + + + + + + + + + +![png](output_36_1.png) + + +## 坐标轴,标题,网格 + +可以设置坐标轴的标签和标题: + + +```python +plot(x, sin(x)) +xlabel('radians') +# 可以设置字体大小 +ylabel('amplitude', fontsize='large') +title('Sin(x)') +``` + + + + + + + + + +![png](output_39_1.png) + + +用 'grid()' 来显示网格: + + +```python +plot(x, sin(x)) +xlabel('radians') +ylabel('amplitude', fontsize='large') +title('Sin(x)') +grid() +``` + + +![png](output_41_0.png) + + +## 清除、关闭图像 + +清除已有的图像使用: + + clf() + +关闭当前图像: + + close() + +关闭所有图像: + + close('all') + +## imshow 显示图片 + +灰度图片可以看成二维数组: + + +```python +# 导入lena图片 +from scipy.misc import lena +img = lena() +img +``` + + + + + array([[162, 162, 162, ..., 170, 155, 128], + [162, 162, 162, ..., 170, 155, 128], + [162, 162, 162, ..., 170, 155, 128], + ..., + [ 43, 43, 50, ..., 104, 100, 98], + [ 44, 44, 55, ..., 104, 105, 108], + [ 44, 44, 55, ..., 104, 105, 108]]) + + + +我们可以用 `imshow()` 来显示图片数据: + + +```python +imshow(img, + # 设置坐标范围 + extent = [-25, 25, -25, 25], + # 设置colormap + cmap = cm.bone) +colorbar() +``` + + + + + + + + + +![png](output_48_1.png) + + +更多参数和用法可以参阅帮助。 + +这里 `cm` 表示 `colormap`,可以看它的种类: + + +```python +dir(cm) +``` + + + + + [u'Accent', + u'Accent_r', + u'Blues', + u'Blues_r', + u'BrBG', + u'BrBG_r', + u'BuGn', + u'BuGn_r', + u'BuPu', + u'BuPu_r', + u'CMRmap', + u'CMRmap_r', + u'Dark2', + u'Dark2_r', + u'GnBu', + u'GnBu_r', + u'Greens', + u'Greens_r', + u'Greys', + u'Greys_r', + 'LUTSIZE', + u'OrRd', + u'OrRd_r', + u'Oranges', + u'Oranges_r', + u'PRGn', + u'PRGn_r', + u'Paired', + u'Paired_r', + u'Pastel1', + u'Pastel1_r', + u'Pastel2', + u'Pastel2_r', + u'PiYG', + u'PiYG_r', + u'PuBu', + u'PuBuGn', + u'PuBuGn_r', + u'PuBu_r', + u'PuOr', + u'PuOr_r', + u'PuRd', + u'PuRd_r', + u'Purples', + u'Purples_r', + u'RdBu', + u'RdBu_r', + u'RdGy', + u'RdGy_r', + u'RdPu', + u'RdPu_r', + u'RdYlBu', + u'RdYlBu_r', + u'RdYlGn', + u'RdYlGn_r', + u'Reds', + u'Reds_r', + 'ScalarMappable', + u'Set1', + u'Set1_r', + u'Set2', + u'Set2_r', + u'Set3', + u'Set3_r', + u'Spectral', + u'Spectral_r', + u'Wistia', + u'Wistia_r', + u'YlGn', + u'YlGnBu', + u'YlGnBu_r', + u'YlGn_r', + u'YlOrBr', + u'YlOrBr_r', + u'YlOrRd', + u'YlOrRd_r', + '__builtins__', + '__doc__', + '__file__', + '__name__', + '__package__', + '_generate_cmap', + '_reverse_cmap_spec', + '_reverser', + 'absolute_import', + u'afmhot', + u'afmhot_r', + u'autumn', + u'autumn_r', + u'binary', + u'binary_r', + u'bone', + u'bone_r', + u'brg', + u'brg_r', + u'bwr', + u'bwr_r', + 'cbook', + 'cmap_d', + 'cmapname', + 'colors', + u'cool', + u'cool_r', + u'coolwarm', + u'coolwarm_r', + u'copper', + u'copper_r', + 'cubehelix', + u'cubehelix_r', + 'datad', + 'division', + u'flag', + u'flag_r', + 'get_cmap', + u'gist_earth', + u'gist_earth_r', + u'gist_gray', + u'gist_gray_r', + u'gist_heat', + u'gist_heat_r', + u'gist_ncar', + u'gist_ncar_r', + u'gist_rainbow', + u'gist_rainbow_r', + u'gist_stern', + u'gist_stern_r', + u'gist_yarg', + u'gist_yarg_r', + u'gnuplot', + u'gnuplot2', + u'gnuplot2_r', + u'gnuplot_r', + u'gray', + u'gray_r', + u'hot', + u'hot_r', + u'hsv', + u'hsv_r', + u'jet', + u'jet_r', + 'ma', + 'mpl', + u'nipy_spectral', + u'nipy_spectral_r', + 'np', + u'ocean', + u'ocean_r', + 'os', + u'pink', + u'pink_r', + 'print_function', + u'prism', + u'prism_r', + u'rainbow', + u'rainbow_r', + 'register_cmap', + 'revcmap', + u'seismic', + u'seismic_r', + 'six', + 'spec', + 'spec_reversed', + u'spectral', + u'spectral_r', + u'spring', + u'spring_r', + u'summer', + u'summer_r', + u'terrain', + u'terrain_r', + 'unicode_literals', + u'winter', + u'winter_r'] + + + +使用不同的 `colormap` 会有不同的显示效果。 + + +```python +imshow(img, cmap=cm.RdGy_r) +``` + + + + + + + + + +![png](output_53_1.png) + + +## 从脚本中运行 + +在脚本中使用 `plot` 时,通常图像是不会直接显示的,需要增加 `show()` 选项,只有在遇到 `show()` 命令之后,图像才会显示。 + +## 直方图 + +从高斯分布随机生成1000个点得到的直方图: + + +```python +hist(randn(1000)) +``` + + + + + (array([ 2., 7., 37., 119., 216., 270., 223., 82., 31., 13.]), + array([-3.65594649, -2.98847032, -2.32099415, -1.65351798, -0.98604181, + -0.31856564, 0.34891053, 1.0163867 , 1.68386287, 2.35133904, + 3.01881521]), + ) + + + + +![png](output_58_1.png) + + +更多例子请参考下列网站: + +http://matplotlib.org/gallery.html diff --git a/docs/03-numpy/03.03-numpy-arrays.md b/docs/03-numpy/03.03-numpy-arrays.md new file mode 100644 index 00000000..03e39b48 --- /dev/null +++ b/docs/03-numpy/03.03-numpy-arrays.md @@ -0,0 +1,1097 @@ + +# Numpy 数组及其索引 + +先导入numpy: + + +```python +from numpy import * +``` + +## 产生数组 + +从列表产生数组: + + +```python +lst = [0, 1, 2, 3] +a = array(lst) +a +``` + + + + + array([0, 1, 2, 3]) + + + +或者直接将列表传入: + + +```python +a = array([1, 2, 3, 4]) +a +``` + + + + + array([1, 2, 3, 4]) + + + +## 数组属性 + +查看类型: + + +```python +type(a) +``` + + + + + numpy.ndarray + + + +查看数组中的数据类型: + + +```python +# 32比特的整数 +a.dtype +``` + + + + + dtype('int32') + + + +查看每个元素所占的字节: + + +```python +a.itemsize +``` + + + + + 4 + + + +查看形状,会返回一个元组,每个元素代表这一维的元素数目: + + +```python +# 1维数组,返回一个元组 +a.shape +``` + + + + + (4L,) + + + +或者使用: + + +```python +shape(a) +``` + + + + + (4L,) + + + +`shape` 的使用历史要比 `a.shape` 久,而且还可以作用于别的类型: + + +```python +lst = [1,2,3,4] +shape(lst) +``` + + + + + (4L,) + + + +查看元素数目: + + +```python +a.size +``` + + + + + 4 + + + + +```python +size(a) +``` + + + + + 4 + + + +查看所有元素所占的空间: + + +```python +a.nbytes +``` + + + + + 16 + + + +但事实上,数组所占的存储空间要比这个数字大,因为要用一个header来保存shape,dtype这样的信息。 + +查看数组维数: + + +```python +a.ndim +``` + + + + + 1 + + + +## 使用fill方法设定初始值 + +可以使用 `fill` 方法将数组设为指定值: + + +```python +a.fill(-4.8) +a +``` + + + + + array([-4, -4, -4, -4]) + + + +但是与列表不同,数组中要求所有元素的 `dtype` 是一样的,如果传入参数的类型与数组类型不一样,需要按照已有的类型进行转换。 + +## 索引与切片 + +和列表相似,数组也支持索引和切片操作。 + +索引第一个元素: + + +```python +a = array([0, 1, 2, 3]) +a[0] +``` + + + + + 0 + + + +修改第一个元素的值: + + +```python +a[0] = 10 +a +``` + + + + + array([10, 1, 2, 3]) + + + +切片,支持负索引: + + +```python +a = array([11,12,13,14,15]) +a[1:3] +``` + + + + + array([12, 13]) + + + + +```python +a[1:-2] +``` + + + + + array([12, 13]) + + + + +```python +a[-4:3] +``` + + + + + array([12, 13]) + + + +省略参数: + + +```python +a[::2] +``` + + + + + array([11, 13, 15]) + + + + +```python +a[-2:] +``` + + + + + array([14, 15]) + + + +假设我们记录一辆汽车表盘上每天显示的里程数: + + +```python +od = array([21000, 21180, 21240, 22100, 22400]) +``` + +可以这样计算每天的旅程: + + +```python +dist = od[1:] - od[:-1] +dist +``` + + + + + array([180, 60, 860, 300]) + + + +在本质上,**Python**会将array的各种计算转换为类似这样的**C**代码: + +```c +int compute_sum(int *arr, int N) { + int sum = 0; + int i; + for (i = 0; i < N; i++) { + sum += arr[i]; + } + return sum; +} +``` + +## 多维数组及其属性 + +`array` 还可以用来生成多维数组: + + +```python +a = array([[ 0, 1, 2, 3], + [10,11,12,13]]) +a +``` + + + + + array([[ 0, 1, 2, 3], + [10, 11, 12, 13]]) + + + +事实上我们传入的是一个以列表为元素的列表,最终得到一个二维数组。 + +甚至可以扩展到3D或者4D的情景。 + +查看形状: + + +```python +a.shape +``` + + + + + (2L, 4L) + + + +这里2代表行数,4代表列数。 + +查看总的元素个数: + + +```python +# 2 * 4 = 8 +a.size +``` + + + + + 8 + + + +查看维数: + + +```python +a.ndim +``` + + + + + 2 + + + +## 多维数组索引 + +对于二维数组,可以传入两个数字来索引: + + +```python +a[1, 3] +``` + + + + + 13 + + + +其中,1是行索引,3是列索引,中间用逗号隔开,事实上,**Python**会将它们看成一个元组(1,3),然后按照顺序进行对应。 + +可以利用索引给它赋值: + + +```python +a[1, 3] = -1 +a +``` + + + + + array([[ 0, 1, 2, 3], + [10, 11, 12, -1]]) + + + +事实上,我们还可以使用单个索引来索引一整行内容: + + +```python +# 返回第二行元组组成的array +a[1] +``` + + + + + array([10, 11, 12, -1]) + + + +**Python**会将这单个元组当成对第一维的索引,然后返回对应的内容。 + +## 多维数组切片 + +多维数组,也支持切片操作: + + +```python +a = array([[ 0, 1, 2, 3, 4, 5], + [10,11,12,13,14,15], + [20,21,22,23,24,25], + [30,31,32,33,34,35], + [40,41,42,43,44,45], + [50,51,52,53,54,55]]) +a +``` + + + + + array([[ 0, 1, 2, 3, 4, 5], + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + [40, 41, 42, 43, 44, 45], + [50, 51, 52, 53, 54, 55]]) + + + +想得到第一行的第 4 和第 5 两个元素: + + +```python +a[0, 3:5] +``` + + + + + array([3, 4]) + + + +得到最后两行的最后两列: + + +```python +a[4:, 4:] +``` + + + + + array([[44, 45], + [54, 55]]) + + + +得到第三列: + + +```python +a[:, 2] +``` + + + + + array([ 2, 12, 22, 32, 42, 52]) + + + +每一维都支持切片的规则,包括负索引,省略: + + [lower:upper:step] + +例如,取出3,5行的奇数列: + + +```python +a[2::2, ::2] +``` + + + + + array([[20, 22, 24], + [40, 42, 44]]) + + + +## 切片是引用 + +切片在内存中使用的是引用机制。 + + +```python +a = array([0,1,2,3,4]) +b = a[2:4] +print b +``` + + [2 3] + + +引用机制意味着,**Python**并没有为 `b` 分配新的空间来存储它的值,而是让 `b` 指向了 `a` 所分配的内存空间,因此,改变 `b` 会改变 `a` 的值: + + +```python +b[0] = 10 +a +``` + + + + + array([ 0, 1, 10, 3, 4]) + + + +而这种现象在列表中并不会出现: + + +```python +a = [1,2,3,4,5] +b = a[2:3] +b[0] = 13234 +print a +``` + + [1, 2, 3, 4, 5] + + +这样做的好处在于,对于很大的数组,不用大量复制多余的值,节约了空间。 + +缺点在于,可能出现改变一个值改变另一个值的情况。 + +一个解决方法是使用copy()方法产生一个复制,这个复制会申请新的内存: + + +```python +a = array([0,1,2,3,4]) +b = a[2:4].copy() +b[0] = 10 +a +``` + + + + + array([0, 1, 2, 3, 4]) + + + +## 花式索引 + +切片只能支持连续或者等间隔的切片操作,要想实现任意位置的操作,需要使用花式索引 `fancy slicing` 。 + +### 一维花式索引 + +与 range 函数类似,我们可以使用 arange 函数来产生等差数组。 + + +```python +a = arange(0, 80, 10) +a +``` + + + + + array([ 0, 10, 20, 30, 40, 50, 60, 70]) + + + +花式索引需要指定索引位置: + + +```python +indices = [1, 2, -3] +y = a[indices] +print y +``` + + [10 20 50] + + +还可以使用布尔数组来花式索引: + + +```python +mask = array([0,1,1,0,0,1,0,0], + dtype=bool) +``` + + +```python +a[mask] +``` + + + + + array([10, 20, 50]) + + + +或者用布尔表达式生成 `mask`,选出了所有大于0.5的值: + + +```python +from numpy.random import rand +a = rand(10) +a +``` + + + + + array([ 0.37214708, 0.48594733, 0.73365131, 0.15769295, 0.30786017, + 0.62068734, 0.36940654, 0.09424167, 0.53085308, 0.12248951]) + + + + +```python +mask = a > 0.5 +a[mask] +``` + + + + + array([ 0.73365131, 0.62068734, 0.53085308]) + + + +mask 必须是布尔数组。 + +### 二维花式索引 + + +```python +a = array([[ 0, 1, 2, 3, 4, 5], + [10,11,12,13,14,15], + [20,21,22,23,24,25], + [30,31,32,33,34,35], + [40,41,42,43,44,45], + [50,51,52,53,54,55]]) +a +``` + + + + + array([[ 0, 1, 2, 3, 4, 5], + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + [40, 41, 42, 43, 44, 45], + [50, 51, 52, 53, 54, 55]]) + + + +对于二维花式索引,我们需要给定 `row` 和 `col` 的值: + + +```python +a[(0,1,2,3,4), (1,2,3,4,5)] +``` + + + + + array([ 1, 12, 23, 34, 45]) + + + +返回的是一条次对角线上的5个值。 + + +```python +a[3:, [0,2,5]] +``` + + + + + array([[30, 32, 35], + [40, 42, 45], + [50, 52, 55]]) + + + +返回的是最后三行的第1,3,5列。 + +也可以使用mask进行索引: + + +```python +mask = array([1,0,1,0,0,1], + dtype=bool) +a[mask, 2] +``` + + + + + array([ 2, 22, 52]) + + + +与切片不同,花式索引返回的是原对象的一个复制而不是引用。 + +### “不完全”索引 + +只给定行索引的时候,返回整行: + + +```python +y = a[:3] +y +``` + + + + + array([[ 0, 1, 2, 3, 4, 5], + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25]]) + + + +这时候也可以使用花式索引取出第2,3,5行: + + +```python +condition = array([0,1,1,0,1], + dtype=bool) +a[condition] +``` + + + + + array([[10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [40, 41, 42, 43, 44, 45]]) + + + +### 三维花式索引 + + +```python +a = arange(64) +a.shape = 4,4,4 +a +``` + + + + + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11], + [12, 13, 14, 15]], + + [[16, 17, 18, 19], + [20, 21, 22, 23], + [24, 25, 26, 27], + [28, 29, 30, 31]], + + [[32, 33, 34, 35], + [36, 37, 38, 39], + [40, 41, 42, 43], + [44, 45, 46, 47]], + + [[48, 49, 50, 51], + [52, 53, 54, 55], + [56, 57, 58, 59], + [60, 61, 62, 63]]]) + + + + +```python +y = a[:,:,[2, -1]] +y +``` + + + + + array([[[ 2, 3], + [ 6, 7], + [10, 11], + [14, 15]], + + [[18, 19], + [22, 23], + [26, 27], + [30, 31]], + + [[34, 35], + [38, 39], + [42, 43], + [46, 47]], + + [[50, 51], + [54, 55], + [58, 59], + [62, 63]]]) + + + +## where语句 + + where(array) + +`where` 函数会返回所有非零元素的索引。 + +### 一维数组 + +先看一维的例子: + + +```python +a = array([0, 12, 5, 20]) +``` + +判断数组中的元素是不是大于10: + + +```python +a > 10 +``` + + + + + array([False, True, False, True], dtype=bool) + + + +数组中所有大于10的元素的索引位置: + + +```python +where(a > 10) +``` + + + + + (array([1, 3], dtype=int64),) + + + +注意到 `where` 的返回值是一个元组。 + +使用元组是由于 where 可以对多维数组使用,此时返回值就是多维的。 + +在使用的时候,我们可以这样: + + +```python +indices = where(a > 10) +indices = indices[0] +indices +``` + + + + + array([1, 3], dtype=int64) + + + +或者: + + +```python +indices = where(a>10)[0] +indices +``` + + + + + array([1, 3], dtype=int64) + + + +可以直接用 `where` 的返回值进行索引: + + +```python +loc = where(a > 10) +a[loc] +``` + + + + + array([12, 20]) + + + +### 多维数组 + +考虑二维数组: + + +```python +a = array([[0, 12, 5, 20], + [1, 2, 11, 15]]) +loc = where(a > 10) +``` + +返回结果是一个二维的元组,每一维代表这一维的索引值: + + +```python +loc +``` + + + + + (array([0, 0, 1, 1], dtype=int64), array([1, 3, 2, 3], dtype=int64)) + + + +也可以直接用来索引a: + + +```python +a[loc] +``` + + + + + array([12, 20, 11, 15]) + + + +或者可以这样: + + +```python +rows, cols = where(a>10) +``` + + +```python +rows +``` + + + + + array([0, 0, 1, 1], dtype=int64) + + + + +```python +cols +``` + + + + + array([1, 3, 2, 3], dtype=int64) + + + + +```python +a[rows, cols] +``` + + + + + array([12, 20, 11, 15]) + + + +再看另一个例子: + + +```python +a = arange(25) +a.shape = 5,5 +a +``` + + + + + array([[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19], + [20, 21, 22, 23, 24]]) + + + + +```python +a > 12 +``` + + + + + array([[False, False, False, False, False], + [False, False, False, False, False], + [False, False, False, True, True], + [ True, True, True, True, True], + [ True, True, True, True, True]], dtype=bool) + + + + +```python +where(a > 12) +``` + + + + + (array([2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4], dtype=int64), + array([3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int64)) + + diff --git a/docs/03-numpy/03.04-array-types.md b/docs/03-numpy/03.04-array-types.md new file mode 100644 index 00000000..1f9c9c10 --- /dev/null +++ b/docs/03-numpy/03.04-array-types.md @@ -0,0 +1,574 @@ + +# 数组类型 + + +```python +from numpy import * +``` + +之前已经看过整数数组和布尔数组,除此之外还有浮点数数组和复数数组。 + +## 复数数组 + +产生一个复数数组: + + +```python +a = array([1 + 1j, 2, 3, 4]) +``` + +**Python**会自动判断数组的类型: + + +```python +a.dtype +``` + + + + + dtype('complex128') + + + +对于复数我们可以查看它的实部和虚部: + + +```python +a.real +``` + + + + + array([ 1., 2., 3., 4.]) + + + + +```python +a.imag +``` + + + + + array([ 1., 0., 0., 0.]) + + + +还可以设置它们的值: + + +```python +a.imag = [1,2,3,4] +``` + +查看 `a`: + + +```python +a +``` + + + + + array([ 1.+1.j, 2.+2.j, 3.+3.j, 4.+4.j]) + + + +查看复共轭: + + +```python +a.conj() +``` + + + + + array([ 1.-1.j, 2.-2.j, 3.-3.j, 4.-4.j]) + + + +事实上,这些属性方法可以用在浮点数或者整数数组上: + + +```python +a = array([0.,1,2,3]) +a.dtype +``` + + + + + dtype('float64') + + + + +```python +a.real +``` + + + + + array([ 0., 1., 2., 3.]) + + + + +```python +a.imag +``` + + + + + array([ 0., 0., 0., 0.]) + + + + +```python +a.conj() +``` + + + + + array([ 0., 1., 2., 3.]) + + + +但这里,虚部是只读的,并不能修改它的值: + + +```python +# 会报错 +a.imag = [1,2,3,4] +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + 1 # 会报错 + ----> 2 a.imag = [1,2,3,4] + + + TypeError: array does not have imaginary part to set + + +## 指定数组类型 + +之前已经知道,构建数组的时候,数组会根据传入的内容自动判断类型: + + +```python +a = array([0,1.0,2,3]) +``` + +对于浮点数,默认为双精度: + + +```python +a.dtype +``` + + + + + dtype('float64') + + + +查看所用字节(`8 bytes * 4`): + + +```python +a.nbytes +``` + + + + + 32 + + + +当然,我们也可以在构建的时候指定类型: + + +```python +a = array([0,1.0,2,3], + dtype=float32) +``` + +此时类型为单精度浮点数: + + +```python +a.dtype +``` + + + + + dtype('float32') + + + +查看所用字节(`4 bytes * 4`): + + +```python +a.nbytes +``` + + + + + 16 + + + +除此之外,还可以指定有无符号,例如无符号整数: + + +```python +a = array([0,1,2,3], + dtype=uint8) +a.dtype +``` + + + + + dtype('uint8') + + + +`uint8` 只使用一个字节,表示 `0` 到 `255` 的整数。 + +还可以从二进制数据中读取。 + +先写入二进制数据: + + +```python +a = array([102,111,212], + dtype=uint8) +a.tofile('foo.dat') +``` + +从数据中读入,要指定类型: + + +```python +b = frombuffer('foo', + dtype=uint8) +b +``` + + + + + array([102, 111, 111], dtype=uint8) + + + +清理数据文件: + + +```python +import os +os.remove('foo.dat') +``` + +`0-255` 的数字可以表示ASCⅡ码,我们可以用 ord 函数来查看字符的ASCⅡ码值: + + +```python +ord('f') +``` + + + + + 102 + + + + +```python +ord('S') +``` + + + + + 83 + + + +## Numpy 类型 + +具体如下: + +|基本类型|可用的**Numpy**类型|备注 +|--|--|-- +|布尔型|`bool`|占1个字节 +|整型|`int8, int16, int32, int64, int128, int`| `int` 跟**C**语言中的 `long` 一样大 +|无符号整型|`uint8, uint16, uint32, uint64, uint128, uint`| `uint` 跟**C**语言中的 `unsigned long` 一样大 +|浮点数| `float16, float32, float64, float, longfloat`|默认为双精度 `float64` ,`longfloat` 精度大小与系统有关 +|复数| `complex64, complex128, complex, longcomplex`| 默认为 `complex128` ,即实部虚部都为双精度 +|字符串| `string, unicode` | 可以使用 `dtype=S4` 表示一个4字节字符串的数组 +|对象| `object` |数组中可以使用任意值| +|Records| `void` || +|时间| `datetime64, timedelta64` || + +任意类型的数组: + + +```python +a = array([1,1.2,'hello', [10,20,30]], + dtype=object) +``` + +乘法: + + +```python +a * 2 +``` + + + + + array([2, 2.4, 'hellohello', [10, 20, 30, 10, 20, 30]], dtype=object) + + + +## 类型转换 + +转换数组的类型: + + +```python +a = array([1.5, -3], + dtype=float32) +a +``` + + + + + array([ 1.5, -3. ], dtype=float32) + + + +### asarray 函数 + +使用 `asarray` 函数: + + +```python +asarray(a, dtype=float64) +``` + + + + + array([ 1.5, -3. ]) + + + + +```python +asarray(a, dtype=uint8) +``` + + + + + array([ 1, 253], dtype=uint8) + + + +`asarray` 不会修改原来数组的值: + + +```python +a +``` + + + + + array([ 1.5, -3. ], dtype=float32) + + + +但当类型相同的时候,`asarray` 并不会产生新的对象,而是使用同一个引用: + + +```python +b = asarray(a, dtype=float32) +``` + + +```python +b is a +``` + + + + + True + + + +这么做的好处在与,`asarray` 不仅可以作用于数组,还可以将其他类型转化为数组。 + +有些时候为了保证我们的输入值是数组,我们需要将其使用 `asarray` 转化,当它已经是数组的时候,并不会产生新的对象,这样保证了效率。 + + +```python +asarray([1,2,3,4]) +``` + + + + + array([1, 2, 3, 4]) + + + +### astype 方法 + +`astype` 方法返回一个新数组: + + +```python +a.astype(float64) +``` + + + + + array([ 1.5, -3. ]) + + + + +```python +a.astype(uint8) +``` + + + + + array([ 1, 253], dtype=uint8) + + + +astype也不会改变原来数组的值: + + +```python +a +``` + + + + + array([ 1.5, -3. ], dtype=float32) + + + +另外,`astype` 总是返回原来数组的一份复制,即使转换的类型是相同的: + + +```python +b = a.astype(float32) +print a +print b +``` + + [ 1.5 -3. ] + [ 1.5 -3. ] + + + +```python +a is b +``` + + + + + False + + + +### view 方法 + + +```python +a = array((1,2,3,4), dtype=int32) +a +``` + + + + + array([1, 2, 3, 4]) + + + +`view` 会将 `a` 在内存中的表示看成是 `uint8` 进行解析: + + +```python +b = a.view(uint8) +b +``` + + + + + array([1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0], dtype=uint8) + + + + +```python +a[0] = 2**30 +a +``` + + + + + array([1073741824, 2, 3, 4]) + + + +修改 `a` 会修改 `b` 的值,因为共用一块内存: + + +```python +b +``` + + + + + array([ 0, 0, 0, 64, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0], dtype=uint8) + + diff --git a/docs/03-numpy/03.05-array-calculation-method.md b/docs/03-numpy/03.05-array-calculation-method.md new file mode 100644 index 00000000..3c8f1231 --- /dev/null +++ b/docs/03-numpy/03.05-array-calculation-method.md @@ -0,0 +1,478 @@ + +# 数组方法 + + +```python +%pylab +``` + + Using matplotlib backend: Qt4Agg + Populating the interactive namespace from numpy and matplotlib + + +## 求和 + + +```python +a = array([[1,2,3], + [4,5,6]]) +``` + +求所有元素的和: + + +```python +sum(a) +``` + + + + + 21 + + + +指定求和的维度: + +沿着第一维求和: + + +```python +sum(a, axis=0) +``` + + + + + array([5, 7, 9]) + + + +沿着第二维求和: + + +```python +sum(a, axis=1) +``` + + + + + array([ 6, 15]) + + + +沿着最后一维求和: + + +```python +sum(a, axis=-1) +``` + + + + + array([ 6, 15]) + + + +或者使用 `sum` 方法: + + +```python +a.sum() +``` + + + + + 21 + + + + +```python +a.sum(axis=0) +``` + + + + + array([5, 7, 9]) + + + + +```python +a.sum(axis=-1) +``` + + + + + array([ 6, 15]) + + + +## 求积 + +求所有元素的乘积: + + +```python +a.prod() +``` + + + + + 720 + + + +或者使用函数形式: + + +```python +prod(a, axis=0) +``` + + + + + array([ 4, 10, 18]) + + + +## 求最大最小值 + + +```python +from numpy.random import rand +a = rand(3, 4) +%precision 3 +a +``` + + + + + array([[ 0.444, 0.06 , 0.668, 0.02 ], + [ 0.793, 0.302, 0.81 , 0.381], + [ 0.296, 0.182, 0.345, 0.686]]) + + + +全局最小: + + +```python +a.min() +``` + + + + + 0.020 + + + +沿着某个轴的最小: + + +```python +a.min(axis=0) +``` + + + + + array([ 0.296, 0.06 , 0.345, 0.02 ]) + + + +全局最大: + + +```python +a.max() +``` + + + + + 0.810 + + + +沿着某个轴的最大: + + +```python +a.max(axis=-1) +``` + + + + + array([ 0.668, 0.81 , 0.686]) + + + +## 最大最小值的位置 + +使用 `argmin, argmax` 方法: + + +```python +a.argmin() +``` + + + + + 3 + + + + +```python +a.argmin(axis=0) +``` + + + + + array([2, 0, 2, 0], dtype=int64) + + + +## 均值 + +可以使用 `mean` 方法: + + +```python +a = array([[1,2,3],[4,5,6]]) +``` + + +```python +a.mean() +``` + + + + + 3.500 + + + + +```python +a.mean(axis=-1) +``` + + + + + array([ 2., 5.]) + + + +也可以使用 `mean` 函数: + + +```python +mean(a) +``` + + + + + 3.500 + + + +还可以使用 `average` 函数: + + +```python +average(a, axis = 0) +``` + + + + + array([ 2.5, 3.5, 4.5]) + + + +`average` 函数还支持加权平均: + + +```python +average(a, axis = 0, weights=[1,2]) +``` + + + + + array([ 3., 4., 5.]) + + + +## 标准差 + +用 `std` 方法计算标准差: + + +```python +a.std(axis=1) +``` + + + + + array([ 0.816, 0.816]) + + + +用 `var` 方法计算方差: + + +```python +a.var(axis=1) +``` + + + + + array([ 0.667, 0.667]) + + + +或者使用函数: + + +```python +var(a, axis=1) +``` + + + + + array([ 0.667, 0.667]) + + + + +```python +std(a, axis=1) +``` + + + + + array([ 0.816, 0.816]) + + + +## clip 方法 + +将数值限制在某个范围: + + +```python +a +``` + + + + + array([[1, 2, 3], + [4, 5, 6]]) + + + + +```python +a.clip(3,5) +``` + + + + + array([[3, 3, 3], + [4, 5, 5]]) + + + +小于3的变成3,大于5的变成5。 + +## ptp 方法 + +计算最大值和最小值之差: + + +```python +a.ptp(axis=1) +``` + + + + + array([2, 2]) + + + + +```python +a.ptp() +``` + + + + + 5 + + + +## round 方法 + +近似,默认到整数: + + +```python +a = array([1.35, 2.5, 1.5]) +``` + +这里,.5的近似规则为近似到偶数值,可以参考: + +https://en.wikipedia.org/wiki/Rounding#Round_half_to_odd + + +```python +a.round() +``` + + + + + array([ 1., 2., 2.]) + + + +近似到一位小数: + + +```python +a.round(decimals=1) +``` + + + + + array([ 1.4, 2.5, 1.5]) + + diff --git a/docs/03-numpy/03.06-sorting-numpy-arrays.md b/docs/03-numpy/03.06-sorting-numpy-arrays.md new file mode 100644 index 00000000..ba5cb485 --- /dev/null +++ b/docs/03-numpy/03.06-sorting-numpy-arrays.md @@ -0,0 +1,281 @@ + +# 数组排序 + + +```python +%pylab +``` + + Using matplotlib backend: Qt4Agg + Populating the interactive namespace from numpy and matplotlib + + +## sort 函数 + +先看这个例子: + + +```python +names = array(['bob', 'sue', 'jan', 'ad']) +weights = array([20.8, 93.2, 53.4, 61.8]) + +sort(weights) +``` + + + + + array([ 20.8, 53.4, 61.8, 93.2]) + + + +`sort` 返回的结果是从小到大排列的。 + +## argsort 函数 + +`argsort` 返回从小到大的排列在数组中的索引位置: + + +```python +ordered_indices = argsort(weights) +ordered_indices +``` + + + + + array([0, 2, 3, 1], dtype=int64) + + + +可以用它来进行索引: + + +```python +weights[ordered_indices] +``` + + + + + array([ 20.8, 53.4, 61.8, 93.2]) + + + + +```python +names[ordered_indices] +``` + + + + + array(['bob', 'jan', 'ad', 'sue'], + dtype='|S3') + + + +使用函数并不会改变原来数组的值: + + +```python +weights +``` + + + + + array([ 20.8, 93.2, 53.4, 61.8]) + + + +## sort 和 argsort 方法 + +数组也支持方法操作: + + +```python +data = array([20.8, 93.2, 53.4, 61.8]) +data.argsort() +``` + + + + + array([0, 2, 3, 1], dtype=int64) + + + +`argsort` 方法与 `argsort` 函数的使用没什么区别,也不会改变数组的值。 + + +```python +data +``` + + + + + array([ 20.8, 93.2, 53.4, 61.8]) + + + +但是 `sort`方法会改变数组的值: + + +```python +data.sort() +``` + + +```python +data +``` + + + + + array([ 20.8, 53.4, 61.8, 93.2]) + + + +## 二维数组排序 + +对于多维数组,sort方法默认沿着最后一维开始排序: + + +```python +a = array([ + [.2, .1, .5], + [.4, .8, .3], + [.9, .6, .7] + ]) +a +``` + + + + + array([[ 0.2, 0.1, 0.5], + [ 0.4, 0.8, 0.3], + [ 0.9, 0.6, 0.7]]) + + + +对于二维数组,默认相当于对每一行进行排序: + + +```python +sort(a) +``` + + + + + array([[ 0.1, 0.2, 0.5], + [ 0.3, 0.4, 0.8], + [ 0.6, 0.7, 0.9]]) + + + +改变轴,对每一列进行排序: + + +```python +sort(a, axis = 0) +``` + + + + + array([[ 0.2, 0.1, 0.3], + [ 0.4, 0.6, 0.5], + [ 0.9, 0.8, 0.7]]) + + + +## searchsorted 函数 + + searchsorted(sorted_array, values) + +`searchsorted` 接受两个参数,其中,第一个必需是已排序的数组。 + + +```python +sorted_array = linspace(0,1,5) +values = array([.1,.8,.3,.12,.5,.25]) +``` + + +```python +searchsorted(sorted_array, values) +``` + + + + + array([1, 4, 2, 1, 2, 1], dtype=int64) + + + +排序数组: + +|0|1|2|3|4| +|-|-|-|-|-| +|0.0|0.25|0.5|0.75|1.0 + +数值: + +|值|0.1|0.8|0.3|0.12|0.5|0.25| +|-|-|-|-|-|-|-| +|插入位置|1|4|2|1|2|1| + +`searchsorted` 返回的值相当于保持第一个数组的排序性质不变,将第二个数组中的值插入第一个数组中的位置: + +例如 `0.1` 在 [0.0, 0.25) 之间,所以插入时应当放在第一个数组的索引 `1` 处,故第一个返回值为 `1`。 + + +```python +from numpy.random import rand +data = rand(100) +data.sort() +``` + +不加括号,默认是元组: + + +```python +bounds = .4, .6 +bounds +``` + + + + + (0.4, 0.6) + + + +返回这两个值对应的插入位置: + + +```python +low_idx, high_idx = searchsorted(data, bounds) +``` + +利用插入位置,将数组中所有在这两个值之间的值提取出来: + + +```python +data[low_idx:high_idx] +``` + + + + + array([ 0.41122674, 0.4395727 , 0.45609773, 0.45707137, 0.45772076, + 0.46029997, 0.46757401, 0.47525517, 0.4969198 , 0.53068779, + 0.55764166, 0.56288568, 0.56506548, 0.57003042, 0.58035233, + 0.59279233, 0.59548555]) + + diff --git a/docs/03-numpy/03.07-array-shapes.md b/docs/03-numpy/03.07-array-shapes.md new file mode 100644 index 00000000..0dcc8625 --- /dev/null +++ b/docs/03-numpy/03.07-array-shapes.md @@ -0,0 +1,721 @@ + +# 数组形状 + + +```python +%pylab +``` + + Using matplotlib backend: Qt4Agg + Populating the interactive namespace from numpy and matplotlib + + +## 修改数组的形状 + + +```python +a = arange(6) +a +``` + + + + + array([0, 1, 2, 3, 4, 5]) + + + +将形状修改为2乘3: + + +```python +a.shape = 2,3 +a +``` + + + + + array([[0, 1, 2], + [3, 4, 5]]) + + + +与之对应的方法是 `reshape` ,但它不会修改原来数组的值,而是返回一个新的数组: + + +```python +a.reshape(3,2) +``` + + + + + array([[0, 1], + [2, 3], + [4, 5]]) + + + + +```python +a +``` + + + + + array([[0, 1, 2], + [3, 4, 5]]) + + + +`shape` 和 `reshape` 方法不能改变数组中元素的总数,否则会报错: + + +```python +a.reshape(4,2) +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + ----> 1 a.reshape(4,2) + + + ValueError: total size of new array must be unchanged + + +## 使用 newaxis 增加数组维数 + + +```python +a = arange(3) +shape(a) +``` + + + + + (3L,) + + + + +```python +y = a[newaxis, :] +shape(y) +``` + + + + + (1L, 3L) + + + +根据插入位置的不同,可以返回不同形状的数组: + + +```python +y = a[:, newaxis] +shape(y) +``` + + + + + (3L, 1L) + + + +插入多个新维度: + + +```python +y = a[newaxis, newaxis, :] +shape(y) +``` + + + + + (1L, 1L, 3L) + + + +## squeeze 方法去除多余的轴 + + +```python +a = arange(6) +a.shape = (2,1,3) +``` + + +```python +b = a.squeeze() +b.shape +``` + + + + + (2L, 3L) + + + +squeeze 返回一个将所有长度为1的维度去除的新数组。 + +## 数组转置 + +使用 `transpose` 返回数组的转置,本质上是将所有维度反过来: + + +```python +a +``` + + + + + array([[[0, 1, 2]], + + [[3, 4, 5]]]) + + + +对于二维数组,这相当于交换行和列: + + +```python +a.transpose() +``` + + + + + array([[[0, 3]], + + [[1, 4]], + + [[2, 5]]]) + + + +或者使用缩写属性: + + +```python +a.T +``` + + + + + array([[[0, 3]], + + [[1, 4]], + + [[2, 5]]]) + + + +注意: +- 对于复数数组,转置并不返回复共轭,只是单纯的交换轴的位置 +- 转置可以作用于多维数组 + + +```python +a = arange(60) +a +``` + + + + + array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59]) + + + + +```python +a.shape = 3,4,5 +a +``` + + + + + array([[[ 0, 1, 2, 3, 4], + [ 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14], + [15, 16, 17, 18, 19]], + + [[20, 21, 22, 23, 24], + [25, 26, 27, 28, 29], + [30, 31, 32, 33, 34], + [35, 36, 37, 38, 39]], + + [[40, 41, 42, 43, 44], + [45, 46, 47, 48, 49], + [50, 51, 52, 53, 54], + [55, 56, 57, 58, 59]]]) + + + + +```python +b = a.T +b.shape +``` + + + + + (5L, 4L, 3L) + + + +转置只是交换了轴的位置。 + +另一方面,转置返回的是对原数组的另一种view,所以改变转置会改变原来数组的值。 + + +```python +a = arange(6) +a.shape = (2,3) +a +``` + + + + + array([[0, 1, 2], + [3, 4, 5]]) + + + +修改转置: + + +```python +b = a.T +b[0,1] = 30 +``` + +原数组的值也改变: + + +```python +a +``` + + + + + array([[ 0, 1, 2], + [30, 4, 5]]) + + + +## 数组连接 + +有时我们需要将不同的数组按照一定的顺序连接起来: + + concatenate((a0,a1,...,aN), axis=0) + +注意,这些数组要用 `()` 包括到一个元组中去。 + +除了给定的轴外,这些数组其他轴的长度必须是一样的。 + + +```python +x = array([ + [0,1,2], + [10,11,12] + ]) +y = array([ + [50,51,52], + [60,61,62] + ]) +print x.shape +print y.shape +``` + + (2L, 3L) + (2L, 3L) + + +默认沿着第一维进行连接: + + +```python +z = concatenate((x,y)) +z +``` + + + + + array([[ 0, 1, 2], + [10, 11, 12], + [50, 51, 52], + [60, 61, 62]]) + + + + +```python +z.shape +``` + + + + + (4L, 3L) + + + +沿着第二维进行连接: + + +```python +z = concatenate((x,y), axis=1) +z +``` + + + + + array([[ 0, 1, 2, 50, 51, 52], + [10, 11, 12, 60, 61, 62]]) + + + + +```python +z.shape +``` + + + + + (2L, 6L) + + + +注意到这里 `x` 和 `y` 的形状是一样的,还可以将它们连接成三维的数组,但是 `concatenate` 不能提供这样的功能,不过可以这样: + + +```python +z = array((x,y)) +``` + + +```python +z.shape +``` + + + + + (2L, 2L, 3L) + + + +事实上,**Numpy**提供了分别对应这三种情况的函数: + +- vstack +- hstack +- dstack + + +```python +vstack((x, y)).shape +``` + + + + + (4L, 3L) + + + + +```python +hstack((x, y)).shape +``` + + + + + (2L, 6L) + + + + +```python +dstack((x, y)).shape +``` + + + + + (2L, 3L, 2L) + + + +## Flatten 数组 + +`flatten` 方法的作用是将多维数组转化为1维数组: + + +```python +a = array([[0,1], + [2,3]]) +b = a.flatten() +b +``` + + + + + array([0, 1, 2, 3]) + + + +返回的是数组的复制,因此,改变 `b` 并不会影响 `a` 的值: + + +```python +b[0] = 10 +print b +print a +``` + + [10 1 2 3] + [[0 1] + [2 3]] + + +## flat 属性 + +还可以使用数组自带的 `flat` 属性: + + +```python +a.flat +``` + + + + + + + + +`a.flat` 相当于返回了所有元组组成的一个迭代器: + + +```python +b = a.flat +``` + + +```python +b[0] +``` + + + + + 0 + + + +但此时修改 `b` 的值会影响 `a` : + + +```python +b[0] = 10 +print a +``` + + [[10 1] + [ 2 3]] + + + +```python +a.flat[:] +``` + + + + + array([10, 1, 2, 3]) + + + +## ravel 方法 + +除此之外,还可以使用 `ravel` 方法,`ravel` 使用高效的表示方式: + + +```python +a = array([[0,1], + [2,3]]) +b = a.ravel() +b +``` + + + + + array([0, 1, 2, 3]) + + + +修改 `b` 会改变 `a` : + + +```python +b[0] = 10 +a +``` + + + + + array([[10, 1], + [ 2, 3]]) + + + +但另一种情况下: + + +```python +a = array([[0,1], + [2,3]]) +aa = a.transpose() +b = aa.ravel() +b +``` + + + + + array([0, 2, 1, 3]) + + + + +```python +b[0] = 10 +``` + + +```python +aa +``` + + + + + array([[0, 2], + [1, 3]]) + + + + +```python +a +``` + + + + + array([[0, 1], + [2, 3]]) + + + +可以看到,在这种情况下,修改 `b` 并不会改变 `aa` 的值,原因是我们用来 `ravel` 的对象 `aa` 本身是 `a` 的一个view。 + +## atleast_xd 函数 + +保证数组至少有 `x` 维: + + +```python +x = 1 +atleast_1d(x) +``` + + + + + array([1]) + + + + +```python +a = array([1,2,3]) +b = atleast_2d(a) +b.shape +``` + + + + + (1L, 3L) + + + + +```python +b +``` + + + + + array([[1, 2, 3]]) + + + + +```python +c = atleast_3d(b) +``` + + +```python +c.shape +``` + + + + + (1L, 3L, 1L) + + + +`x` 可以取值 1,2,3。 + +在**Scipy**库中,这些函数被用来保证输入满足一定的条件:“ + +|用法|**Scipy**中出现次数| +|-|-| +|value.flaten()
value.flat
value.ravel() | ~2000次 +| atleast_1d(value)
atleast_2d(value) |~700次 +| asarray(value) |~4000次 diff --git a/docs/03-numpy/03.08-diagonals.md b/docs/03-numpy/03.08-diagonals.md new file mode 100644 index 00000000..5b2dd509 --- /dev/null +++ b/docs/03-numpy/03.08-diagonals.md @@ -0,0 +1,132 @@ + +# 对角线 + +这里,使用与之前不同的导入方法: + + +```python +import numpy as np +``` + +使用numpy中的函数前,需要加上 `np.`: + + +```python +a = np.array([11,21,31,12,22,32,13,23,33]) +a.shape = 3,3 +a +``` + + + + + array([[11, 21, 31], + [12, 22, 32], + [13, 23, 33]]) + + + +查看它的对角线元素: + + +```python +a.diagonal() +``` + + + + + array([11, 22, 33]) + + + +可以使用偏移来查看它的次对角线,正数表示右移,负数表示左移: + + +```python +a.diagonal(offset=1) +``` + + + + + array([21, 32]) + + + + +```python +a.diagonal(offset=-1) +``` + + + + + array([12, 23]) + + + +可以使用花式索引来得到对角线: + + +```python +i = [0,1,2] +a[i, i] +``` + + + + + array([11, 22, 33]) + + + +可以更新对角线的值: + + +```python +a[i, i] = 2 +a +``` + + + + + array([[ 2, 21, 31], + [12, 2, 32], + [13, 23, 2]]) + + + +修改次对角线的值: + + +```python +i = np.array([0,1]) +a[i, i + 1] = 1 +a +``` + + + + + array([[ 2, 1, 31], + [12, 2, 1], + [13, 23, 2]]) + + + + +```python +a[i + 1, i] = -1 +a +``` + + + + + array([[ 2, 1, 31], + [-1, 2, 1], + [13, -1, 2]]) + + diff --git a/docs/03-numpy/03.09-data-to-&-from-string.md b/docs/03-numpy/03.09-data-to-&-from-string.md new file mode 100644 index 00000000..984c50b4 --- /dev/null +++ b/docs/03-numpy/03.09-data-to-&-from-string.md @@ -0,0 +1,91 @@ + +# 数组与字符串的转换 + +## tostring 方法 + + +```python +import numpy as np +``` + + +```python +a = np.array([[1,2], + [3,4]], + dtype = np.uint8) +``` + +转化为字符串: + + +```python +a.tostring() +``` + + + + + '\x01\x02\x03\x04' + + + +我们可以使用不同的顺序来转换字符串: + + +```python +a.tostring(order='F') +``` + + + + + '\x01\x03\x02\x04' + + + +这里使用了**Fortran**的格式,按照列来读数据。 + +## fromstring 函数 + +可以使用 `fromstring` 函数从字符串中读出数据,不过要指定类型: + + +```python +s = a.tostring() +a = np.fromstring(s, + dtype=np.uint8) +a +``` + + + + + array([1, 2, 3, 4], dtype=uint8) + + + +此时,返回的数组是一维的,需要重新设定维度: + + +```python +a.shape = 2,2 +a +``` + + + + + array([[1, 2], + [3, 4]], dtype=uint8) + + + +对于文本文件,推荐使用 +- `loadtxt` +- `genfromtxt` +- `savetxt` + +对于二进制文本文件,推荐使用 +- `save` +- `load` +- `savez` diff --git a/docs/03-numpy/03.10-array-attribute-&-method-overview-.md b/docs/03-numpy/03.10-array-attribute-&-method-overview-.md new file mode 100644 index 00000000..edfb15b2 --- /dev/null +++ b/docs/03-numpy/03.10-array-attribute-&-method-overview-.md @@ -0,0 +1,871 @@ + +# 数组属性方法总结 + +||作用| +|-|-| +|1|**基本属性** +|`a.dtype`|数组元素类型 `float32,uint8,...` +|`a.shape`|数组形状 `(m,n,o,...)` +|`a.size`|数组元素数 +|`a.itemsize`|每个元素占字节数 +|`a.nbytes`|所有元素占的字节 +|`a.ndim`|数组维度 +|2|**形状相关** +|`a.flat`|所有元素的迭代器 +|`a.flatten()`|返回一个1维数组的复制 +|`a.ravel()`|返回一个1维数组,高效 +|`a.resize(new_size)`|改变形状 +|`a.swapaxes(axis1, axis2)`|交换两个维度的位置 +|`a.transpose(*axex)`|交换所有维度的位置 +|`a.T`|转置,`a.transpose()` +|`a.squeeze()`| 去除所有长度为1的维度 +|3|**填充复制** +|`a.copy()`| 返回数组的一个复制 +|`a.fill(value)`| 将数组的元组设置为特定值 +|4|**转化** +|`a.tolist()`|将数组转化为列表 +|`a.tostring()`|转换为字符串 +|`a.astype(dtype)`|转化为指定类型 +|`a.byteswap(False)`|转换大小字节序 +|`a.view(type_or_dtype)`|生成一个使用相同内存,但使用不同的表示方法的数组 +|5|**复数** +|`a.imag`|虚部 +|`a.real`|实部 +|`a.conjugate()`|复共轭 +|`a.conj()`|复共轭(缩写) +|6|**保存** +|`a.dump(file)`|将二进制数据存在file中 +|`a.dump()`|将二进制数据表示成字符串 +|`a.tofile(fid, sep="",format="%s")`|格式化ASCⅡ码写入文件 +|7|**查找排序** +|`a.nonzero()`|返回所有非零元素的索引 +|`a.sort(axis=-1)`|沿某个轴排序 +|`a.argsort(axis=-1)`|沿某个轴,返回按排序的索引 +|`a.searchsorted(b)`|返回将b中元素插入a后能保持有序的索引值 +|8|**元素数学操作** +|`a.clip(low, high)`|将数值限制在一定范围内 +|`a.round(decimals=0)`|近似到指定精度 +|`a.cumsum(axis=None)`|累加和 +|`a.cumprod(axis=None)`|累乘积 +|9|**约简操作** +|`a.sum(axis=None)`|求和 +|`a.prod(axis=None)`|求积 +|`a.min(axis=None)`|最小值 +|`a.max(axis=None)`|最大值 +|`a.argmin(axis=None)`|最小值索引 +|`a.argmax(axis=None)`|最大值索引 +|`a.ptp(axis=None)`|最大值减最小值 +|`a.mean(axis=None)`|平均值 +|`a.std(axis=None)`|标准差 +|`a.var(axis=None)`|方差 +|`a.any(axis=None)`|只要有一个不为0,返回真,逻辑或 +|`a.all(axis=None)`|所有都不为0,返回真,逻辑与 + + +```python +from numpy import * +``` + +## 基本属性 + + +```python +a = array([[0, 1, 2, 3], [4, 5, 6, 7]]) +a +``` + + + + + array([[0, 1, 2, 3], + [4, 5, 6, 7]]) + + + +数组元素属性: + + +```python +a.dtype +``` + + + + + dtype('int32') + + + +形状: + + +```python +a.shape +``` + + + + + (2L, 4L) + + + +元素数目: + + +```python +a.size +``` + + + + + 8 + + + +元素占字节大小: + + +```python +a.itemsize +``` + + + + + 4 + + + +所有元素所占字节: + + +```python +a.nbytes +``` + + + + + 32 + + + +数据维度: + + +```python +a.ndim +``` + + + + + 2 + + + +## 形状相关 + + +```python +for row in a: + print row +``` + + [0 1 2 3] + [4 5 6 7] + + +所有元素的迭代器: + + +```python +for elt in a.flat: + print elt +``` + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + + +所有元素组成的一维数组,按照行排列: + + +```python +a.flatten() +``` + + + + + array([0, 1, 2, 3, 4, 5, 6, 7]) + + + + +```python +a.ravel() +``` + + + + + array([0, 1, 2, 3, 4, 5, 6, 7]) + + + +重新改变形状: + + +```python +a.resize((4,2)) +a +``` + + + + + array([[0, 1], + [2, 3], + [4, 5], + [6, 7]]) + + + +交换这两个轴的顺序: + + +```python +a.swapaxes(0,1) +``` + + + + + array([[0, 2, 4, 6], + [1, 3, 5, 7]]) + + + +转置: + + +```python +a.transpose() +``` + + + + + array([[0, 2, 4, 6], + [1, 3, 5, 7]]) + + + +转置: + + +```python +a.T +``` + + + + + array([[0, 2, 4, 6], + [1, 3, 5, 7]]) + + + + +```python +a2 = array([1,2,3]) +a2.shape +``` + + + + + (3L,) + + + + +```python +a2.resize((1,3,1)) +a2.shape +``` + + + + + (1L, 3L, 1L) + + + +去除长度为1的维度: + + +```python +a2 = a2.squeeze() +a2.shape +``` + + + + + (3L,) + + + +## 填充复制 + +复制: + + +```python +b = a.copy() +b +``` + + + + + array([[0, 1], + [2, 3], + [4, 5], + [6, 7]]) + + + +复制不影响原来的数组: + + +```python +b[0][0] = -1 +b # First value changed +``` + + + + + array([[-1, 1], + [ 2, 3], + [ 4, 5], + [ 6, 7]]) + + + + +```python +a # original not changed because b is a copy +``` + + + + + array([[0, 1], + [2, 3], + [4, 5], + [6, 7]]) + + + +填充: + + +```python +b.fill(4) +b +``` + + + + + array([[4, 4], + [4, 4], + [4, 4], + [4, 4]]) + + + +## 转化 + +转化为列表: + + +```python +a.tolist() +``` + + + + + [[0, 1], [2, 3], [4, 5], [6, 7]] + + + +转化为字符串: + + +```python +a.tostring() +``` + + + + + '\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\x06\x00\x00\x00\x07\x00\x00\x00' + + + +改变数组元素类型: + + +```python +a.astype(float) +``` + + + + + array([[ 0., 1.], + [ 2., 3.], + [ 4., 5.], + [ 6., 7.]]) + + + + +```python +b = a.copy() +b.byteswap(False) +``` + + + + + array([[ 0, 16777216], + [ 33554432, 50331648], + [ 67108864, 83886080], + [100663296, 117440512]]) + + + +将它看成16位整数: + + +```python +a.view(dtype=int16) +``` + + + + + array([[0, 0, 1, 0], + [2, 0, 3, 0], + [4, 0, 5, 0], + [6, 0, 7, 0]], dtype=int16) + + + +## 复数 + +实部: + + +```python +b = array([1+2j, 3+4j, 5+6j]) +b.real +``` + + + + + array([ 1., 3., 5.]) + + + +虚部: + + +```python +b.imag +``` + + + + + array([ 2., 4., 6.]) + + + +共轭: + + +```python +b.conj() +``` + + + + + array([ 1.-2.j, 3.-4.j, 5.-6.j]) + + + + +```python +b.conjugate() +``` + + + + + array([ 1.-2.j, 3.-4.j, 5.-6.j]) + + + +## 保存 + +保存成文本: + + +```python +a.dump("file.txt") +``` + +字符串: + + +```python +a.dumps() +``` + + + + + '\x80\x02cnumpy.core.multiarray\n_reconstruct\nq\x01cnumpy\nndarray\nq\x02K\x00\x85U\x01b\x87Rq\x03(K\x01\x8a\x01\x04\x8a\x01\x02\x86cnumpy\ndtype\nq\x04U\x02i4K\x00K\x01\x87Rq\x05(K\x03U\x01 + + + + +![png](output_25_2.png) + + +事实上,`x, y` 中有很多冗余的元素,这里提供了一个 `sparse` 的选项: + + +```python +x_ticks = np.linspace(-1, 1, 5) +y_ticks = np.linspace(-1, 1, 5) + +x, y = np.meshgrid(x_ticks, y_ticks, sparse=True) +``` + + +```python +x +``` + + + + + array([[-1. , -0.5, 0. , 0.5, 1. ]]) + + + + +```python +y +``` + + + + + array([[-1. ], + [-0.5], + [ 0. ], + [ 0.5], + [ 1. ]]) + + + +在这个选项下,`x, y` 变成了单一的行向量和列向量。 + +但这并不影响结果: + + +```python +x_ticks = np.linspace(-10, 10, 51) +y_ticks = np.linspace(-10, 10, 51) + +x, y = np.meshgrid(x_ticks, y_ticks, sparse=True) + +z = f(x, y) + +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(x, y, z, + rstride=1, cstride=1, + cmap=cm.YlGnBu_r) +ax.set_xlabel('x') +ax.set_ylabel('y') +ax.set_zlabel('z') +``` + + c:\Miniconda\lib\site-packages\IPython\kernel\__main__.py:9: RuntimeWarning: invalid value encountered in divide + + + + + + + + + + +![png](output_31_2.png) + + +`meshgrid` 可以设置轴排列的先后顺序: +- 默认为 `indexing='xy'` 即笛卡尔坐标,对于2维数组,返回行向量 `x` 和列向量 `y` +- 或者使用 `indexing='ij'` 即矩阵坐标,对于2维数组,返回列向量 `x` 和行向量 `y`。 + +## ogrid , mgrid + +**Matlab**中有 `meshgrid` 的用法: + + meshgrid(-1:.5:1, -1:.5:1) + +**Numpy**的 `meshgrid` 并不支持这样的用法,但我们可以使用 `ogrid / mgrid` 来实现类似这样的用法。 + +`ogrid` 与 `mgrid` 的区别在于: +- `ogrid` 相当于 `meshgrid(indexing='ij', sparse=True)` +- `mgrid` 相当于 `meshgrid(indexing='ij', sparse=False)` + + +```python +x, y = np.ogrid[-1:1:.5, -1:1:.5] +``` + + +```python +x +``` + + + + + array([[-1. ], + [-0.5], + [ 0. ], + [ 0.5]]) + + + + +```python +y +``` + + + + + array([[-1. , -0.5, 0. , 0.5]]) + + + +注意: +- 这里使用的是中括号 +- **Matlab** 使用的是 `start:step:end` 的表示,**Numpy** 使用的是 `start:end:step` 的表示 +- 这里的结果不包括 `end` 的值 + +为了包含 `end` 的值,我们可以使用这样的技巧: + + +```python +x, y = np.ogrid[-1:1:5j, -1:1:5j] +``` + + +```python +x, y +``` + + + + + (array([[-1. ], + [-0.5], + [ 0. ], + [ 0.5], + [ 1. ]]), array([[-1. , -0.5, 0. , 0.5, 1. ]])) + + + +我们在 `step` 的位置传入一个复数 `5j` ,表示我们需要一个 `5` 个值的数组,此时返回值就会包含 `end` 的值。 + +重复之前的画图: + + +```python +# exchange here +y, x = np.ogrid[-10:10:51j, -10:10:51j] + +z = f(x, y) + +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') +ax.plot_surface(x, y, z, + rstride=1, cstride=1, + cmap=cm.YlGnBu_r) +ax.set_xlabel('x') +ax.set_ylabel('y') +ax.set_zlabel('z') +``` + + c:\Miniconda\lib\site-packages\IPython\kernel\__main__.py:9: RuntimeWarning: invalid value encountered in divide + + + + + + + + + + +![png](output_43_2.png) + + +这里,我们交换了 `x, y` 输出值的顺序。 + +## r`_` , c`_` + +我们可以使用 `r_ / c_` 来产生行向量或者列向量。 + +使用切片产生: + + +```python +np.r_[0:1:.1] +``` + + + + + array([ 0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) + + + +复数步长制定数组长度: + + +```python +np.r_[0:1:5j] +``` + + + + + array([ 0. , 0.25, 0.5 , 0.75, 1. ]) + + + +连接多个序列,产生数组: + + +```python +np.r_[(3,22,11), 4.0, [15, 6]] +``` + + + + + array([ 3., 22., 11., 4., 15., 6.]) + + + +列向量: + + +```python +np.c_[1:3:5j] +``` + + + + + array([[ 1. ], + [ 1.5], + [ 2. ], + [ 2.5], + [ 3. ]]) + + + +## ones , zeros + +```python +ones(shape, dtype=float64) +zeros(shape, dtype=float64) +``` + +产生一个制定形状的全 `0` 或全 `1` 的数组,还可以制定数组类型: + + +```python +np.zeros(3) +``` + + + + + array([ 0., 0., 0.]) + + + + +```python +np.ones([2,3], dtype=np.float32) +``` + + + + + array([[ 1., 1., 1.], + [ 1., 1., 1.]], dtype=float32) + + + +产生一个全是 `5` 的数组: + + +```python +np.ones([2,3]) * 5 +``` + + + + + array([[ 5., 5., 5.], + [ 5., 5., 5.]]) + + + +## empty + + empty(shape, dtype=float64, order='C') + +也可以使用 `empty` 方法产生一个制定大小的数组(数组所指向的内存未被初始化,所以值随机),再用 `fill` 方法填充: + + +```python +a = np.empty(2) +a +``` + + + + + array([-0.03412165, 0.05516321]) + + + + +```python +a.fill(5) +a +``` + + + + + array([ 5., 5.]) + + + +另一种替代方法使用索引,不过速度会稍微慢一些: + + +```python +a[:] = 5 +a +``` + + + + + array([ 5., 5.]) + + + +## empty`_`like, ones`_`like, zeros`_`like + + empty_like(a) + ones_like(a) + zeros_like(a) + +产生一个跟 `a` 大小一样,类型一样的对应数组。 + + +```python +a = np.arange(0, 10, 2.5) +a +``` + + + + + array([ 0. , 2.5, 5. , 7.5]) + + + + +```python +np.empty_like(a) +``` + + + + + array([ 0., 0., 0., 0.]) + + + + +```python +np.zeros_like(a) +``` + + + + + array([ 0., 0., 0., 0.]) + + + + +```python +np.ones_like(a) +``` + + + + + array([ 1., 1., 1., 1.]) + + + +## identity + + indentity(n, dtype=float64) +产生一个 `n` 乘 `n` 的单位矩阵: + + +```python +np.identity(3) +``` + + + + + array([[ 1., 0., 0.], + [ 0., 1., 0.], + [ 0., 0., 1.]]) + + diff --git a/docs/03-numpy/03.12-matrix-object.md b/docs/03-numpy/03.12-matrix-object.md new file mode 100644 index 00000000..a6ac0025 --- /dev/null +++ b/docs/03-numpy/03.12-matrix-object.md @@ -0,0 +1,117 @@ + +# 矩阵 + +使用 `mat` 方法将 `2` 维数组转化为矩阵: + + +```python +import numpy as np +a = np.array([[1,2,4], + [2,5,3], + [7,8,9]]) +A = np.mat(a) +A +``` + + + + + matrix([[1, 2, 4], + [2, 5, 3], + [7, 8, 9]]) + + + +也可以使用 **Matlab** 的语法传入一个字符串来生成矩阵: + + +```python +A = np.mat('1,2,4;2,5,3;7,8,9') +A +``` + + + + + matrix([[1, 2, 4], + [2, 5, 3], + [7, 8, 9]]) + + + +利用分块创造新的矩阵: + + +```python +a = np.array([[ 1, 2], + [ 3, 4]]) +b = np.array([[10,20], + [30,40]]) + +np.bmat('a,b;b,a') +``` + + + + + matrix([[ 1, 2, 10, 20], + [ 3, 4, 30, 40], + [10, 20, 1, 2], + [30, 40, 3, 4]]) + + + +矩阵与向量的乘法: + + +```python +x = np.array([[1], [2], [3]]) +x +``` + + + + + array([[1], + [2], + [3]]) + + + + +```python +A * x +``` + + + + + matrix([[17], + [21], + [50]]) + + + +`A.I` 表示 `A` 矩阵的逆矩阵: + + +```python +print A * A.I +``` + + [[ 1.00000000e+00 0.00000000e+00 0.00000000e+00] + [ 0.00000000e+00 1.00000000e+00 2.08166817e-17] + [ 2.22044605e-16 -8.32667268e-17 1.00000000e+00]] + + +矩阵指数表示矩阵连乘: + + +```python +print A ** 4 +``` + + [[ 6497 9580 9836] + [ 7138 10561 10818] + [18434 27220 27945]] + diff --git a/docs/03-numpy/03.13-general-functions.md b/docs/03-numpy/03.13-general-functions.md new file mode 100644 index 00000000..1599fa8c --- /dev/null +++ b/docs/03-numpy/03.13-general-functions.md @@ -0,0 +1,295 @@ + +# 一般函数 + + +```python +import numpy as np +``` + +## 三角函数 + + sin(x) + cos(x) + tan(x) + sinh(x) + conh(x) + tanh(x) + arccos(x) + arctan(x) + arcsin(x) + arccosh(x) + arctanh(x) + arcsinh(x) + arctan2(x,y) + +`arctan2(x,y)` 返回 `arctan(x/y)` 。 + +## 向量操作 + + dot(x,y) + inner(x,y) + cross(x,y) + vdot(x,y) + outer(x,y) + kron(x,y) + tensordot(x,y[,axis]) + +## 其他操作 + + exp(x) + log(x) + log10(x) + sqrt(x) + absolute(x) + conjugate(x) + negative(x) + ceil(x) + floor(x) + fabs(x) + hypot(x) + fmod(x) + maximum(x,y) + minimum(x,y) + +`hypot` 返回对应点 `(x,y)` 到原点的距离。 + + +```python +x = np.array([1,2,3]) +y = np.array([4,5,6]) +np.hypot(x,y) +``` + + + + + array([ 4.12310563, 5.38516481, 6.70820393]) + + + +## 类型处理 + + iscomplexobj + iscomplex + isrealobj + isreal + imag + real + real_if_close + isscalar + isneginf + isposinf + isinf + isfinite + isnan + nan_to_num + common_type + typename + +正无穷: + + +```python +np.inf +``` + + + + + inf + + + +负无穷: + + +```python +-np.inf +``` + + + + + -inf + + + +非法值(Not a number): + + +```python +np.nan +``` + + + + + nan + + + +检查是否为无穷: + + +```python +np.isinf(1.0) +``` + + + + + False + + + + +```python +np.isinf(np.inf) +``` + + + + + True + + + + +```python +np.isinf(-np.inf) +``` + + + + + True + + + +非法值: + + +```python +np.array([0]) / 0.0 +``` + + c:\Miniconda\lib\site-packages\IPython\kernel\__main__.py:1: RuntimeWarning: invalid value encountered in divide + if __name__ == '__main__': + + + + + + array([ nan]) + + + +这并不会报错,而是返回一个非法值。 + +只有 `0/0` 会得到 `nan`,非0值除以0会得到无穷: + + +```python +a = np.arange(5.0) +b = a / 0.0 +b +``` + + c:\Miniconda\lib\site-packages\IPython\kernel\__main__.py:2: RuntimeWarning: divide by zero encountered in divide + from IPython.kernel.zmq import kernelapp as app + c:\Miniconda\lib\site-packages\IPython\kernel\__main__.py:2: RuntimeWarning: invalid value encountered in divide + from IPython.kernel.zmq import kernelapp as app + + + + + + array([ nan, inf, inf, inf, inf]) + + + +`nan` 与任何数进行比较都是 `False`: + + +```python +b == np.nan +``` + + + + + array([False, False, False, False, False], dtype=bool) + + + +想要找出 `nan` 值需要使用 `isnan`: + + +```python +np.isnan(b) +``` + + + + + array([ True, False, False, False, False], dtype=bool) + + + +## 修改形状 + + atleast_1d + atleast_2d + atleast_3d + expand_dims + apply_over_axes + apply_along_axis + hstack + vstack + dstack + column_stack + hsplit + vsplit + dsplit + split + squeeze + +## 其他有用函数 + + fix + mod + amax + amin + ptp + sum + cumsum + prod + cumprod + diff + angle + + unwrap + sort_complex + trim_zeros + fliplr + flipud + rot90 + diag + eye + select + extract + insert + + roots + poly + any + all + disp + unique + nansum + nanmax + nanargmax + nanargmin + nanmin + +`nan` 开头的函数会进行相应的操作,但是忽略 `nan` 值。 diff --git a/docs/03-numpy/03.14-vectorizing-functions.md b/docs/03-numpy/03.14-vectorizing-functions.md new file mode 100644 index 00000000..f51df9e8 --- /dev/null +++ b/docs/03-numpy/03.14-vectorizing-functions.md @@ -0,0 +1,110 @@ + +# 向量化函数 + +自定义的 `sinc` 函数: + + +``` +import numpy as np + +def sinc(x): + if x == 0.0: + return 1.0 + else: + w = np.pi * x + return np.sin(w) / w +``` + +作用于单个数值: + + +``` +sinc(0.0) +``` + + + + + 1.0 + + + + +``` +sinc(3.0) +``` + + + + + 3.8981718325193755e-17 + + + +但这个函数不能作用于数组: + + +``` +x = np.array([1,2,3]) +sinc(x) +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + 1 x = np.array([1,2,3]) + ----> 2 sinc(x) + + + in sinc(x) + 2 + 3 def sinc(x): + ----> 4 if x == 0.0: + 5 return 1.0 + 6 else: + + + ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() + + +可以使用 `numpy` 的 `vectorize` 将函数 `sinc` 向量化,产生一个新的函数: + + +``` +vsinc = np.vectorize(sinc) +vsinc(x) +``` + + + + + array([ 3.89817183e-17, -3.89817183e-17, 3.89817183e-17]) + + + +其作用是为 `x` 中的每一个值调用 `sinc` 函数: + + +``` +import matplotlib.pyplot as plt +%matplotlib inline + +x = np.linspace(-5,5,101) +plt.plot(x, vsinc(x)) +``` + + + + + [] + + + + +![png](output_11_1.png) + + +因为这样的用法涉及大量的函数调用,因此,向量化函数的效率并不高。 diff --git a/docs/03-numpy/03.15-binary-operators.md b/docs/03-numpy/03.15-binary-operators.md new file mode 100644 index 00000000..99c28e6a --- /dev/null +++ b/docs/03-numpy/03.15-binary-operators.md @@ -0,0 +1,223 @@ + +# 二元运算 + + +```python +import numpy as np +``` + +## 四则运算 + +运算|函数 +--- | --- +`a + b` | `add(a,b)` +`a - b` | `subtract(a,b)` +`a * b` | `multiply(a,b)` +`a / b` | `divide(a,b)` +`a ** b` | `power(a,b)` +`a % b` | `remainder(a,b)` + +以乘法为例,数组与标量相乘,相当于数组的每个元素乘以这个标量: + + +```python +a = np.array([1,2]) +a * 3 +``` + + + + + array([3, 6]) + + + +数组逐元素相乘: + + +```python +a = np.array([1,2]) +b = np.array([3,4]) +a * b +``` + + + + + array([3, 8]) + + + +使用函数: + + +```python +np.multiply(a, b) +``` + + + + + array([3, 8]) + + + +事实上,函数还可以接受第三个参数,表示将结果存入第三个参数中: + + +```python +np.multiply(a, b, a) +``` + + + + + array([3, 8]) + + + + +```python +a +``` + + + + + array([3, 8]) + + + +## 比较和逻辑运算 + +运算|函数< +--- | --- +`==` | `equal` +`!=` | `not_equal` +`>` | `greater` +`>=` | `greater_equal` +`<` | `less` +`<=` | `less_equal` +| `logical_and` +| `logical_or` +| `logical_xor` +| `logical_not` +`&` | `bitwise_and` + | `bitwise_or` +`^` | `bitwise_xor` +`~` | `invert` +`>>` | `right_shift` +`<<` | `left_shift` + +等于操作也是逐元素比较的: + + +```python +a = np.array([[1,2,3,4], + [2,3,4,5]]) +b = np.array([[1,2,5,4], + [1,3,4,5]]) +a == b +``` + + + + + array([[ True, True, False, True], + [False, True, True, True]], dtype=bool) + + + +这意味着,如果我们在条件中要判断两个数组是否一样时,不能直接使用 + + if a == b: + +而要使用: + + if all(a==b): + +对于浮点数,由于存在精度问题,使用函数 `allclose` 会更好: + + if allclose(a,b): + +`logical_and` 也是逐元素的 `and` 操作: + + +```python +a = np.array([0,1,2]) +b = np.array([0,10,0]) + +np.logical_and(a, b) +``` + + + + + array([False, True, False], dtype=bool) + + + +`0` 被认为是 `False`,非零则是 `True`。 + +比特操作: + + +```python +a = np.array([1,2,4,8]) +b = np.array([16,32,64,128]) + +a | b +``` + + + + + array([ 17, 34, 68, 136]) + + + +取反: + + +```python +a = np.array([1,2,3,4], np.uint8) +~a +``` + + + + + array([254, 253, 252, 251], dtype=uint8) + + + +左移: + + +```python +a << 3 +``` + + + + + array([ 8, 16, 24, 32], dtype=uint8) + + + +要注意的是 `&` 的运算优先于比较运算如 `>` 等,所以必要时候需要加上括号: + + +```python +a = np.array([1,2,4,8]) +b = np.array([16,32,64,128]) + +(a > 3) & (b < 100) +``` + + + + + array([False, False, True, False], dtype=bool) + + diff --git a/docs/03-numpy/03.16-universal-functions.md b/docs/03-numpy/03.16-universal-functions.md new file mode 100644 index 00000000..c470f885 --- /dev/null +++ b/docs/03-numpy/03.16-universal-functions.md @@ -0,0 +1,313 @@ + +# ufunc 对象 + +**Numpy** 有两种基本对象:`ndarray (N-dimensional array object)` 和 `ufunc (universal function object)`。`ndarray` 是存储单一数据类型的多维数组,而 `ufunc` 则是能够对数组进行处理的函数。 + +例如,我们之前所接触到的二元操作符对应的 **Numpy** 函数,如 `add`,就是一种 `ufunc` 对象,它可以作用于数组的每个元素。 + + +```python +import numpy as np +``` + + +```python +a = np.array([0,1,2]) +b = np.array([2,3,4]) + +np.add(a, b) +``` + + + + + array([2, 4, 6]) + + + +查看支持的方法: + + +```python +dir(np.add) +``` + + + + + ['__call__', + '__class__', + '__delattr__', + '__doc__', + '__format__', + '__getattribute__', + '__hash__', + '__init__', + '__name__', + '__new__', + '__reduce__', + '__reduce_ex__', + '__repr__', + '__setattr__', + '__sizeof__', + '__str__', + '__subclasshook__', + 'accumulate', + 'at', + 'identity', + 'nargs', + 'nin', + 'nout', + 'ntypes', + 'outer', + 'reduce', + 'reduceat', + 'signature', + 'types'] + + + +除此之外,大部分能够作用于数组的数学函数如三角函数等,都是 `ufunc` 对象。 + +特别地,对于二元操作符所对应的 `ufunc` 对象,支持以下方法: + +## reduce 方法 + + op.reduce(a) + +将`op`沿着某个轴应用,使得数组 `a` 的维数降低一维。 + +add 作用到一维数组上相当于求和: + +$$ +\begin{align} +y & = add.recuce(a) \\ +& = a[0] + a[1] + ... + a[N-1] \\ +& = \sum_{n=0}^{N-1} a[n] +\end{align} +$$ + + +```python +a = np.array([1,2,3,4]) + +np.add.reduce(a) +``` + + + + + 10 + + + +多维数组默认只按照第一维进行运算: + + +```python +a = np.array([[1,2,3],[4,5,6]]) + +np.add.reduce(a) +``` + + + + + array([5, 7, 9]) + + + +指定维度: + + +```python +np.add.reduce(a, 1) +``` + + + + + array([ 6, 15]) + + + +作用于字符串: + + +```python +a = np.array(['ab', 'cd', 'ef'], np.object) + +np.add.reduce(a) +``` + + + + + 'abcdef' + + + +逻辑运算: + + +```python +a = np.array([1,1,0,1]) + +np.logical_and.reduce(a) +``` + + + + + False + + + + +```python +np.logical_or.reduce(a) +``` + + + + + True + + + +## accumulate 方法 + + op.accumulate(a) + +`accumulate` 可以看成保存 `reduce` 每一步的结果所形成的数组。 + +$$ +\begin{align} +y & = add.accumulate(a) \\ +& = \left[\sum_{n=0}^{0} a[n], \sum_{n=0}^{1} a[n], ..., \sum_{n=0}^{N-1} a[n]\right] +\end{align} +$$ + +与之前类似: + + +```python +a = np.array([1,2,3,4]) + +np.add.accumulate(a) +``` + + + + + array([ 1, 3, 6, 10]) + + + + +```python +a = np.array(['ab', 'cd', 'ef'], np.object) + +np.add.accumulate(a) +``` + + + + + array(['ab', 'abcd', 'abcdef'], dtype=object) + + + + +```python +a = np.array([1,1,0,1]) + +np.logical_and.accumulate(a) +``` + + + + + array([ True, True, False, False], dtype=bool) + + + + +```python +np.logical_or.accumulate(a) +``` + + + + + array([ True, True, True, True], dtype=bool) + + + +## reduceat 方法 + + op.reduceat(a, indices) + +`reduceat` 方法将操作符运用到指定的下标上,返回一个与 `indices` 大小相同的数组: + +$$ +\begin{align} +y & = add.reduceat(a, indices) \\ +& = \left[\sum_{n=indice[0]}^{indice[1]-1} a[n], \sum_{n=indice[1]}^{indice[2]-1} a[n], ..., \sum_{n=indice[-1]}^{N-1} a[n]\right] +\end{align} +$$ + + +```python +a = np.array([0, 10, 20, 30, 40, 50]) +indices = np.array([1,4]) + +np.add.reduceat(a, indices) +``` + + + + + array([60, 90]) + + + +这里,`indices` 为 `[1, 4]`,所以 `60` 表示从下标1(包括)加到下标4(不包括)的结果,`90` 表示从下标4(包括)加到结尾的结果。 + +## outer 方法 + + op.outer(a, b) + +对于 `a` 中每个元素,将 `op` 运用到它和 `b` 的每一个元素上所得到的结果: + + +```python +a = np.array([0,1]) +b = np.array([1,2,3]) + +np.add.outer(a, b) +``` + + + + + array([[1, 2, 3], + [2, 3, 4]]) + + + +注意有顺序的区别: + + +```python +np.add.outer(b, a) +``` + + + + + array([[1, 2], + [2, 3], + [3, 4]]) + + diff --git a/docs/03-numpy/03.17-choose.md b/docs/03-numpy/03.17-choose.md new file mode 100644 index 00000000..3ba51125 --- /dev/null +++ b/docs/03-numpy/03.17-choose.md @@ -0,0 +1,129 @@ + +# choose 函数实现条件筛选 + +对于数组,我们有时候需要进行类似 `switch` 和 `case` 进行条件选择,此时使用 choose 函数十分方便: + + +```python +import numpy as np +``` + + +```python +control = np.array([[1,0,1], + [2,1,0], + [1,2,2]]) + +np.choose(control, [10, 11, 12]) +``` + + + + + array([[11, 10, 11], + [12, 11, 10], + [11, 12, 12]]) + + + +在上面的例子中,`choose` 将 `0,1,2` 对应的值映射为了 `10, 11, 12`,这里的 `0,1,2` 表示对应的下标。 + +事实上, `choose` 不仅仅能接受下标参数,还可以接受下标所在的位置: + + +```python +i0 = np.array([[0,1,2], + [3,4,5], + [6,7,8]]) +i2 = np.array([[20,21,22], + [23,24,25], + [26,27,28]]) +control = np.array([[1,0,1], + [2,1,0], + [1,2,2]]) + +np.choose(control, [i0, 10, i2]) +``` + + + + + array([[10, 1, 10], + [23, 10, 5], + [10, 27, 28]]) + + + +这里,`control` 传入第一个 `1` 对应的是 10,传入的第一个 `0` 对应于 `i0` 相应位置的值即 `1`,剩下的以此类推。 + +下面的例子将数组中所有小于 `10` 的值变成了 `10`。 + + +```python +a = np.array([[ 0, 1, 2], + [10,11,12], + [20,21,22]]) + +a < 10 +``` + + + + + array([[ True, True, True], + [False, False, False], + [False, False, False]], dtype=bool) + + + + +```python +np.choose(a < 10, (a, 10)) +``` + + + + + array([[10, 10, 10], + [10, 11, 12], + [20, 21, 22]]) + + + +下面的例子将数组中所有小于 10 的值变成了 10,大于 15 的值变成了 15。 + + +```python +a = np.array([[ 0, 1, 2], + [10,11,12], + [20,21,22]]) + +lt = a < 10 +gt = a > 15 + +choice = lt + 2 * gt +choice +``` + + + + + array([[1, 1, 1], + [0, 0, 0], + [2, 2, 2]]) + + + + +```python +np.choose(choice, (a, 10, 15)) +``` + + + + + array([[10, 10, 10], + [10, 11, 12], + [15, 15, 15]]) + + diff --git a/docs/03-numpy/03.18-array-broadcasting.md b/docs/03-numpy/03.18-array-broadcasting.md new file mode 100644 index 00000000..70b7160e --- /dev/null +++ b/docs/03-numpy/03.18-array-broadcasting.md @@ -0,0 +1,243 @@ + +# 数组广播机制 + + +```python +import numpy as np +``` + +正常的加法: + + +```python +a = np.array([[ 0, 0, 0], + [10,10,10], + [20,20,20], + [30,30,30]]) +b = np.array([[ 0, 1, 2], + [ 0, 1, 2], + [ 0, 1, 2], + [ 0, 1, 2]]) +a + b +``` + + + + + array([[ 0, 1, 2], + [10, 11, 12], + [20, 21, 22], + [30, 31, 32]]) + + + +将 `b` 的值变成一维的 `[0,1,2]` 之后的加法: + + +```python +b = np.array([0,1,2]) + +a + b +``` + + + + + array([[ 0, 1, 2], + [10, 11, 12], + [20, 21, 22], + [30, 31, 32]]) + + + +结果一样,虽然两个数组的维数不一样,但是 **Numpy** 检测到 `b` 的维度与 `a` 的维度匹配,所以将 `b` 扩展为之前的形式,得到相同的形状。 + +对于更高维度,这样的扩展依然有效。 + +如果我们再将 `a` 变成一个列向量呢? + + +```python +a = np.array([0,10,20,30]) +a.shape = 4,1 +a +``` + + + + + array([[ 0], + [10], + [20], + [30]]) + + + + +```python +b +``` + + + + + array([0, 1, 2]) + + + + +```python +a + b +``` + + + + + array([[ 0, 1, 2], + [10, 11, 12], + [20, 21, 22], + [30, 31, 32]]) + + + +可以看到,虽然两者的维度并不相同,但是**Numpy**还是根据两者的维度,自动将它们进行扩展然后进行计算。 + +对于 **Numpy** 来说,维度匹配当且仅当: + +- 维度相同 +- 有一个的维度是1 + +匹配会从最后一维开始进行,直到某一个的维度全部匹配为止,因此对于以下情况,**Numpy** 都会进行相应的匹配: + +A|B|Result +---|---|--- +3d array: 256 x 256 x 3 | 1d array: 3 | 3d array: 256 x 256 x 3 +4d array: 8 x 1 x 6 x 1 | 3d array: 7 x 1 x 5 | 3d array: 8 x 7 x 6 x 5 +3d array: 5 x 4 x 3 | 1d array: 1 | 3d array: 5 x 4 x 3 +3d array: 15 x 4 x 13 | 1d array: 15 x 1 x 13 | 3d array: 15 x 4 x 13 +2d array: 4 x 1 | 1d array: 3 | 2d array: 4 x 3 + +匹配成功后,**Numpy** 会进行运算得到相应的结果。 + +当然,如果相应的维度不匹配,那么**Numpy**会报错: + + +```python +a = np.array([0,10,20,30]) +a.shape +``` + + + + + (4L,) + + + + +```python +b.shape +``` + + + + + (3L,) + + + + +```python +a + b +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + ----> 1 a + b + + + ValueError: operands could not be broadcast together with shapes (4,) (3,) + + +将 `a` 转换为列向量,还是可以计算出结果: + + +```python +a[:, np.newaxis] + b +``` + + + + + array([[ 0, 1, 2], + [10, 11, 12], + [20, 21, 22], + [30, 31, 32]]) + + + +## 例子 + + +```python +x = np.linspace(-.5,.5, 21) +``` + + +```python +y = x[:, np.newaxis] +``` + + +```python +x.shape +``` + + + + + (21L,) + + + + +```python +y.shape +``` + + + + + (21L, 1L) + + + +先形成一个 21 乘 21 的网格,再计算网格到原点的距离: + + +```python +radius = np.sqrt(x ** 2 + y ** 2) +``` + + +```python +import matplotlib.pyplot as plt +%matplotlib inline + +plt.imshow(radius) +``` + + + + + + + + + +![png](output_23_1.png) + diff --git a/docs/03-numpy/03.19-reading-and-writing-arrays.md b/docs/03-numpy/03.19-reading-and-writing-arrays.md new file mode 100644 index 00000000..c40c8f4f --- /dev/null +++ b/docs/03-numpy/03.19-reading-and-writing-arrays.md @@ -0,0 +1,632 @@ + +# 数组读写 + +## 从文本中读取数组 + + +```python +import numpy as np +``` + +### 空格(制表符)分割的文本 + +假设我们有这样的一个空白分割的文件: + + +```python +%%writefile myfile.txt +2.1 2.3 3.2 1.3 3.1 +6.1 3.1 4.2 2.3 1.8 +``` + + Writing myfile.txt + + +为了生成数组,我们首先将数据转化成一个列表组成的列表,再将这个列表转换为数组: + + +```python +data = [] + +with open('myfile.txt') as f: + # 每次读一行 + for line in f: + fileds = line.split() + row_data = [float(x) for x in fileds] + data.append(row_data) + +data = np.array(data) +``` + + +```python +data +``` + + + + + array([[ 2.1, 2.3, 3.2, 1.3, 3.1], + [ 6.1, 3.1, 4.2, 2.3, 1.8]]) + + + +不过,更简便的是使用 `loadtxt` 方法: + + +```python +data = np.loadtxt('myfile.txt') +data +``` + + + + + array([[ 2.1, 2.3, 3.2, 1.3, 3.1], + [ 6.1, 3.1, 4.2, 2.3, 1.8]]) + + + +### 逗号分隔文件 + + +```python +%%writefile myfile.txt +2.1, 2.3, 3.2, 1.3, 3.1 +6.1, 3.1, 4.2, 2.3, 1.8 +``` + + Overwriting myfile.txt + + +对于逗号分隔的文件(通常为`.csv`格式),我们可以稍微修改之前繁琐的过程,将 `split` 的参数变成 `','`即可。 + +不过,`loadtxt` 函数也可以读这样的文件,只需要制定分割符的参数即可: + + +```python +data = np.loadtxt('myfile.txt', delimiter=',') +data +``` + + + + + array([[ 2.1, 2.3, 3.2, 1.3, 3.1], + [ 6.1, 3.1, 4.2, 2.3, 1.8]]) + + + +### loadtxt 函数 + + loadtxt(fname, dtype=, + comments='#', delimiter=None, + converters=None, skiprows=0, + usecols=None, unpack=False, ndmin=0) + +`loadtxt` 有很多可选参数,其中 `delimiter` 就是刚才用到的分隔符参数。 + +`skiprows` 参数表示忽略开头的行数,可以用来读写含有标题的文本 + + +```python +%%writefile myfile.txt +X Y Z MAG ANG +2.1 2.3 3.2 1.3 3.1 +6.1 3.1 4.2 2.3 1.8 +``` + + Overwriting myfile.txt + + + +```python +np.loadtxt('myfile.txt', skiprows=1) +``` + + + + + array([[ 2.1, 2.3, 3.2, 1.3, 3.1], + [ 6.1, 3.1, 4.2, 2.3, 1.8]]) + + + +此外,有一个功能更为全面的 `genfromtxt` 函数,能处理更多的情况,但相应的速度和效率会慢一些。 + + genfromtxt(fname, dtype=, comments='#', delimiter=None, + skiprows=0, skip_header=0, skip_footer=0, converters=None, + missing='', missing_values=None, filling_values=None, usecols=None, + names=None, excludelist=None, deletechars=None, replace_space='_', + autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, + usemask=False, loose=True, invalid_raise=True) + +### loadtxt 的更多特性 + +对于这样一个文件: + + +```python +%%writefile myfile.txt + -- BEGINNING OF THE FILE +% Day, Month, Year, Skip, Power +01, 01, 2000, x876, 13 % wow! +% we don't want have Jan 03rd +04, 01, 2000, xfed, 55 +``` + + Overwriting myfile.txt + + + +```python +data = np.loadtxt('myfile.txt', + skiprows=1, #忽略第一行 + dtype=np.int, #数组类型 + delimiter=',', #逗号分割 + usecols=(0,1,2,4), #指定使用哪几列数据 + comments='%' #百分号为注释符 + ) +data +``` + + + + + array([[ 1, 1, 2000, 13], + [ 4, 1, 2000, 55]]) + + + +### loadtxt 自定义转换方法 + + +```python +%%writefile myfile.txt +2010-01-01 2.3 3.2 +2011-01-01 6.1 3.1 +``` + + Overwriting myfile.txt + + +假设我们的文本包含日期,我们可以使用 `datetime` 在 `loadtxt` 中处理: + + +```python +import datetime + +def date_converter(s): + return datetime.datetime.strptime(s, "%Y-%m-%d") + +data = np.loadtxt('myfile.txt', + dtype=np.object, #数据类型为对象 + converters={0:date_converter, #第一列使用自定义转换方法 + 1:float, #第二第三使用浮点数转换 + 2:float}) + +data +``` + + + + + array([[datetime.datetime(2010, 1, 1, 0, 0), 2.3, 3.2], + [datetime.datetime(2011, 1, 1, 0, 0), 6.1, 3.1]], dtype=object) + + + +移除 `myfile.txt`: + + +```python +import os +os.remove('myfile.txt') +``` + +### 读写各种格式的文件 + +如下表所示: + +文件格式|使用的包|函数 +----|----|---- +txt | numpy | loadtxt, genfromtxt, fromfile, savetxt, tofile +csv | csv | reader, writer +Matlab | scipy.io | loadmat, savemat +hdf | pytables, h5py| +NetCDF | netCDF4, scipy.io.netcdf | netCDF4.Dataset, scipy.io.netcdf.netcdf_file +**文件格式**|**使用的包**|**备注** +wav | scipy.io.wavfile | 音频文件 +jpeg,png,...| PIL, scipy.misc.pilutil | 图像文件 +fits | pyfits | 天文图像 + +此外, `pandas` ——一个用来处理时间序列的包中包含处理各种文件的方法,具体可参见它的文档: + +http://pandas.pydata.org/pandas-docs/stable/io.html + +## 将数组写入文件 + +`savetxt` 可以将数组写入文件,默认使用科学计数法的形式保存: + + +```python +data = np.array([[1,2], + [3,4]]) + +np.savetxt('out.txt', data) +``` + + +```python +with open('out.txt') as f: + for line in f: + print line, +``` + + 1.000000000000000000e+00 2.000000000000000000e+00 + 3.000000000000000000e+00 4.000000000000000000e+00 + + +也可以使用类似**C**语言中 `printf` 的方式指定输出的格式: + + +```python +data = np.array([[1,2], + [3,4]]) + +np.savetxt('out.txt', data, fmt="%d") #保存为整数 +``` + + +```python +with open('out.txt') as f: + for line in f: + print line, +``` + + 1 2 + 3 4 + + +逗号分隔的输出: + + +```python +data = np.array([[1,2], + [3,4]]) + +np.savetxt('out.txt', data, fmt="%.2f", delimiter=',') #保存为2位小数的浮点数,用逗号分隔 +``` + + +```python +with open('out.txt') as f: + for line in f: + print line, +``` + + 1.00,2.00 + 3.00,4.00 + + +复数值默认会加上括号: + + +```python +data = np.array([[1+1j,2], + [3,4]]) + +np.savetxt('out.txt', data, fmt="%.2f", delimiter=',') #保存为2位小数的浮点数,用逗号分隔 +``` + + +```python +with open('out.txt') as f: + for line in f: + print line, +``` + + (1.00+1.00j), (2.00+0.00j) + (3.00+0.00j), (4.00+0.00j) + + +更多参数: + + savetxt(fname, + X, + fmt='%.18e', + delimiter=' ', + newline='\n', + header='', + footer='', + comments='# ') + +移除 `out.txt`: + + +```python +import os +os.remove('out.txt') +``` + +## Numpy 二进制格式 + +数组可以储存成二进制格式,单个的数组保存为 `.npy` 格式,多个数组保存为多个`.npy`文件组成的 `.npz` 格式,每个 `.npy` 文件包含一个数组。 + +与文本格式不同,二进制格式保存了数组的 `shape, dtype` 信息,以便完全重构出保存的数组。 + +保存的方法: + +- `save(file, arr)` 保存单个数组,`.npy` 格式 +- `savez(file, *args, **kwds)` 保存多个数组,无压缩的 `.npz` 格式 +- `savez_compressed(file, *args, **kwds)` 保存多个数组,有压缩的 `.npz` 格式 + +读取的方法: + +- `load(file, mmap_mode=None)` 对于 `.npy`,返回保存的数组,对于 `.npz`,返回一个名称-数组对组成的字典。 + +### 单个数组的读写 + + +```python +a = np.array([[1.0,2.0], [3.0,4.0]]) + +fname = 'afile.npy' +np.save(fname, a) +``` + + +```python +aa = np.load(fname) +aa +``` + + + + + array([[ 1., 2.], + [ 3., 4.]]) + + + +删除生成的文件: + + +```python +import os +os.remove('afile.npy') +``` + +### 二进制与文本大小比较 + + +```python +a = np.arange(10000.) +``` + +保存为文本: + + +```python +np.savetxt('a.txt', a) +``` + +查看大小: + + +```python +import os +os.stat('a.txt').st_size +``` + + + + + 260000L + + + +保存为二进制: + + +```python +np.save('a.npy', a) +``` + +查看大小: + + +```python +os.stat('a.npy').st_size +``` + + + + + 80080L + + + +删除生成的文件: + + +```python +os.remove('a.npy') +os.remove('a.txt') +``` + +可以看到,二进制文件大约是文本文件的三分之一。 + +### 保存多个数组 + + +```python +a = np.array([[1.0,2.0], + [3.0,4.0]]) +b = np.arange(1000) +``` + +保存多个数组: + + +```python +np.savez('data.npz', a=a, b=b) +``` + +查看里面包含的文件: + + +```python +!unzip -l data.npz +``` + + Archive: data.npz + Length Date Time Name + --------- ---------- ----- ---- + 112 2015/08/10 00:46 a.npy + 4080 2015/08/10 00:46 b.npy + --------- ------- + 4192 2 files + + +载入数据: + + +```python +data = np.load('data.npz') +``` + +载入后可以像字典一样进行操作: + + +```python +data.keys() +``` + + + + + ['a', 'b'] + + + + +```python +data['a'] +``` + + + + + array([[ 1., 2.], + [ 3., 4.]]) + + + + +```python +data['b'].shape +``` + + + + + (1000L,) + + + +删除文件: + + +```python +# 要先删除 data,否则删除时会报错 +del data + +os.remove('data.npz') +``` + +### 压缩文件 + +当数据比较整齐时: + + +```python +a = np.arange(20000.) +``` + +无压缩大小: + + +```python +np.savez('a.npz', a=a) +os.stat('a.npz').st_size +``` + + + + + 160188L + + + +有压缩大小: + + +```python +np.savez_compressed('a2.npz', a=a) +os.stat('a2.npz').st_size +``` + + + + + 26885L + + + +大约有 6x 的压缩效果。 + +当数据比较混乱时: + + +```python +a = np.random.rand(20000.) +``` + +无压缩大小: + + +```python +np.savez('a.npz', a=a) +os.stat('a.npz').st_size +``` + + + + + 160188L + + + +有压缩大小: + + +```python +np.savez_compressed('a2.npz', a=a) +os.stat('a2.npz').st_size +``` + + + + + 151105L + + + +只有大约 1.06x 的压缩效果。 + + +```python +os.remove('a.npz') +os.remove('a2.npz') +``` diff --git a/docs/03-numpy/03.20-structured-arrays.md b/docs/03-numpy/03.20-structured-arrays.md new file mode 100644 index 00000000..b780ed28 --- /dev/null +++ b/docs/03-numpy/03.20-structured-arrays.md @@ -0,0 +1,431 @@ + +# 结构化数组 + +假设我们要保存这样的数据: + +|name|age|wgt +--|--|--|-- +0|dan|1|23.1 +1|ann|0|25.1 +2|sam|2|8.3 + +希望定义一个一维数组,每个元素有三个属性 `name, age, wgt`,此时我们需要使用结构化数组。 + + +```python +import numpy as np +``` + +定义数组 `a`: + +0|1|2|3 +-|-|-|- +1.0|2.0|3.0|4.0 + + +```python +a = np.array([1.0,2.0,3.0,4.0], np.float32) +``` + +使用 `view` 方法,将 `a` 对应的内存按照复数来解释: + + +```python +a.view(np.complex64) +``` + + + + + array([ 1.+2.j, 3.+4.j], dtype=complex64) + + + +0|1|2|3 +-|-|-|- +1.0|2.0|3.0|4.0 +real|imag|real|imag + +事实上,我们可以把复数看成一个结构体,第一部分是实部,第二部分是虚部,这样这个数组便可以看成是一个结构化数组。 + +换句话说,我们只需要换种方式解释这段内存,便可以得到结构化数组的效果! + +0|1|2|3 +-|-|-|- +1.0|2.0|3.0|4.0 +mass|vol|mass|vol + +例如,我们可以将第一个浮点数解释为质量,第二个浮点数解释为速度,则这段内存还可以看成是包含两个域(质量和速度)的结构体。 + + +```python +my_dtype = np.dtype([('mass', 'float32'), ('vol', 'float32')]) +``` + + +```python +a.view(my_dtype) +``` + + + + + array([(1.0, 2.0), (3.0, 4.0)], + dtype=[('mass', ' + position + mass + xy + + +那么它的类型可以这样嵌套定义: + + +```python +particle_dtype = np.dtype([('position', [('x', 'float'), + ('y', 'float')]), + ('mass', 'float') + ]) +``` + +假设数据文件如下: + + +```python +%%writefile data.txt +2.0 3.0 42.0 +2.1 4.3 32.5 +1.2 4.6 32.3 +4.5 -6.4 23.3 +``` + + Overwriting data.txt + + +读取数据: + + +```python +data = np.loadtxt('data.txt', dtype=particle_dtype) +``` + + +```python +data +``` + + + + + array([((2.0, 3.0), 42.0), ((2.1, 4.3), 32.5), ((1.2, 4.6), 32.3), + ((4.5, -6.4), 23.3)], + dtype=[('position', [('x', '切片返回复制 | 采用引用传递的方式进行计算
切片返回引用 +文件名必须和函数名相同 | 函数可以在任何地方任何文件中定义 +收费 | 免费 +2D,3D图像支持 | 依赖第三方库如 `matplotlib` 等 +完全的编译环境 | 依赖于 **Python** 提供的编译环境 + +## array 还是 matrix? + +`Numpy` 中不仅提供了 `array` 这个基本类型,还提供了支持矩阵操作的类 `matrix`,但是一般推荐使用 `array`: + +- 很多 `numpy` 函数返回的是 `array`,不是 `matrix` +- 在 `array` 中,逐元素操作和矩阵操作有着明显的不同 +- 向量可以不被视为矩阵 + +具体说来: + +- `*, dot(), multiply()` + - `array`:`*` -逐元素乘法,`dot()` -矩阵乘法 + - `matrix`:`*` -矩阵乘法,`multiply()` -逐元素乘法 +- 处理向量 + - `array`:形状为 `1xN, Nx1, N` 的向量的意义是不同的,类似于 `A[:,1]` 的操作返回的是一维数组,形状为 `N`,一维数组的转置仍是自己本身 + - `matrix`:形状为 `1xN, Nx1`,`A[:,1]` 返回的是二维 `Nx1` 矩阵 +- 高维数组 + - `array`:支持大于2的维度 + - `matrix`:维度只能为2 +- 属性 + - `array`:`.T` 表示转置 + - `matrix`:`.H` 表示复共轭转置,`.I` 表示逆,`.A` 表示转化为 `array` 类型 +- 构造函数 + - `array`:`array` 函数接受一个(嵌套)序列作为参数——`array([[1,2,3],[4,5,6]])` + - `matrix`:`matrix` 函数额外支持字符串参数——`matrix("[1 2 3; 4 5 6]")` + +其优缺点各自如下: + +- **`array`** + - `[GOOD]` 一维数组既可以看成列向量,也可以看成行向量。`v` 在 `dot(A,v)` 被看成列向量,在 `dot(v,A)` 中被看成行向量,这样省去了转置的麻烦 + - `[BAD!]` 矩阵乘法需要使用 `dot()` 函数,如: `dot(dot(A,B),C)` vs `A*B*C` + - `[GOOD]` 逐元素乘法很简单: `A*B` + - `[GOOD]` 作为基本类型,是很多基于 `numpy` 的第三方库函数的返回类型 + - `[GOOD]` 所有的操作 `*,/,+,**,...` 都是逐元素的 + - `[GOOD]` 可以处理任意维度的数据 + - `[GOOD]` 张量运算 + +- **`matrix`** + - `[GOOD]` 类似与 **`MATLAB`** 的操作 + - `[BAD!]` 最高维度为2 + - `[BAD!]` 最低维度也为2 + - `[BAD!]` 很多函数返回的是 `array`,即使传入的参数是 `matrix` + - `[GOOD]` `A*B` 是矩阵乘法 + - `[BAD!]` 逐元素乘法需要调用 `multiply` 函数 + - `[BAD!]` `/` 是逐元素操作 + +当然在实际使用中,二者的使用取决于具体情况。 + +二者可以互相转化: + +- `asarray` :返回数组 +- `asmatrix`(或者`mat`) :返回矩阵 +- `asanyarray` :返回数组或者数组的子类,注意到矩阵是数组的一个子类,所以输入是矩阵的时候返回的也是矩阵 + +## 类 Matlab 函数 + +有很多类似的函数: + +- `ones, zeros, empty, eye, rand, repmat` + +通常这些函数的返回值是 `array`,不过 `numpy` 提供了一个 `matlib` 的子模块,子模块中的这些函数返回值为 `matrix`: + + +```python +import numpy +import numpy.matlib +``` + + +```python +a = numpy.ones(7) + +print a.shape +print type(a) +``` + + (7L,) + + + + +```python +a = numpy.matlib.ones(7) + +print a.shape +print type(a) +``` + + (1L, 7L) + + + +`mat` 函数将一个数组转化为矩阵: + + +```python +a = numpy.array([1,2,3]) + +b = numpy.mat(a) + +print type(b) +``` + + + + +有些函数被放到子模块中了,例如调用 `rand()` 函数需要使用 `numpy.random.rand()` (或者从 `matlib` 模块中生成矩阵): + + +```python +a = numpy.random.rand(10) +print a +``` + + [ 0.66007267 0.34794294 0.5040946 0.65044648 0.74763248 0.42486999 + 0.90922612 0.69071747 0.33541076 0.08570178] + + +## 等效操作 + +假定我们已经这样导入了 `Numpy`: + + +```python +from numpy import * +import scipy.linalg +``` + +以下 `linalg` 表示的是 `numpy.linalg`,与 `scipy.linalg` 不同。 + +注意:**`MATLAB`** 与 **`Numpy`** 下标之间有这样几处不同: +- `1-base` vs `0-base` +- `()` vs `[]` +- `MATLAB`:`beg(:step):end`,包含结束值 `end` +- `Numpy`:`beg:end(:step)`,不包含结束值 `end` + +MATLAB|Numpy|注释 +---|---|--- +`help func` | `info(func)`, `help(func)`, `func?`(IPython)| 查看函数帮助 +`which func` | | 查看函数在什么地方定义 +`type func` | `source(func)`, `func??`(IPython)| 查看函数源代码 +`a && b` | `a and b` | 逻辑 `AND` +`1*i, 1*j, 1i, 1j` | `1j` | 复数 +`eps` | `spacing(1)` | `1` 与最近浮点数的距离 +`ndims(a)` | `ndim(a), a.ndim` | `a` 的维数 +`numel(a)` | `size(a), a.size` | `a` 的元素个数 +`size(a)` | `shape(a), a.shape` | `a` 的形状 +`size(a,n)` | `a.shape[n-1]` | 第 n 维的大小 +`a(2,5)` | `a[1,4]` | 第 2 行第 5 列元素 +`a(2,:)` | `a[1], a[1,:]` | 第 2 行 +`a(1:5,:)` | `a[0:5]` | 第 1 至 5 行 +`a(end-4:end,:)` | `a[-5:]` | 后 5 行 +`a(1:3,5:9)` | `a[0:3][:,4:9]` | 特定行列(1~3 行,5~9 列) +`a([2,4,5],[1,3])` | `a[ix_([1,3,4],[0,2])]` | 特定行列(2,4,5 行的 1,3 列) +`a(3:2:21,:)` | `a[2:21:2,:]` | 特定行列(3,5,...,21 行) +`a(1:2:end,:)` | `a[ ::2,:]` | 奇数行 +`a([1:end 1],:)` | `a[r_[:len(a),0]]` | 将第一行添加到末尾 +`a.'` | `a.T` | 转置 +`a ./ b` | `a/b` | 逐元素除法 +`(a>0.5)` | `(a>0.5)` | 各个元素是否大于 0.5 +`find(a>0.5)` | `nonzero(a>0.5)` | 大于 0.5 的位置 +`a(a<0.5)=0` | `a[a<0.5]=0` | 小于 0.5 的设为 0 +`a(:) = 3` | `a[:] = 3` | 所有元素设为 3 +`y=x` | `y=x.copy()` | 将 y 设为 x +`y=x(2,:)` | `y=x[1,:].copy()` | 注意值传递和引用传递的区别 +`y=x(:)` | `y=x.flatten(1)` | 将矩阵变为一个向量,这里 `1` 表示沿着列进行转化 +`max(max(a))` | `a.max()` | 最大值 +`max(a)` | `a.max(0)` | 每一列的最大值 +`max(a,[],2)` | `a.max(1)` | 每一行的最大值 +`max(a,b)` | `maximum(a,b)` | 逐元素比较,取较大的值 +`a & b` | `logical_and(a, b)` | 逻辑 AND +`bitand(a, b)` | `a & b` | 逐比特 AND +`inv(a)` | `linalg.inv(a)` | a 的逆 +`pinv(a)` | `linalg.inv(a)` | 伪逆 +`rank(a)` | `linalg.matrix_rank(a)` | 秩 +`a\b` | `linalg.solve(a,b)(如果a是方阵),linalg.lstsq(a,b)` | 解 `a x = b` +`b/a` | 求解 `a.T x.T = b.T` | 解 `x a = b` +`[U,S,V]=svd(a)` | `U, S, Vh = linalg.svd(a), V = Vh.T` | 奇异值分解 +`chol(a)` | `linalg.cholesky(a).T` | Cholesky 分解 +`[V,D]=eig(a)` | `D,V = linalg.eig(a)` | 特征值分解 +`[V,D]=eig(a,b)` | `V,D = scipy.linalg.eig(a,b)` | +`[V,D]=eigs(a,k)` | | 前 k 大特征值对应的特征向量 +`` | `` | +`` | `` | +`` | `` | +`` | `` | + +MATLAB|numpy.array|numpy.matrix|注释 +---|---|---|--- +`[1,2,3;4,5,6]` | `array([[1.,2.,3.],[4.,5.,6.]])` | `mat([[1.,2.,3.],[4.,5.,6.]]), mat('1,2,3;4,5,6')` | `2x3` 矩阵 +`[a b;c d]` | `vstack([hstack([a,b]), hsatck([c,d])]])` | `bmat('a b;c d')` | 分块矩阵构造 +`a(end)` | `a[-1]` | `a[:,-1][0,0]` | 最后一个元素 +`a'` | `a.conj().T` | `a.H` | 复共轭转置 +`a * b` | `dot(a,b)` | `a * b` | 矩阵乘法 +`a .* b` | `a * b` | `multiply(a,b)` | 逐元素乘法 +`a.^3` | `a**3` | `power(a,3)` | 逐元素立方 +`a(:,find(v>0.5))` | `a[:,nonzero(v>0.5)[0]]` | `a[:,nonzero(v.A>0.5)[0]]` | 找出行向量 `v>0.5` 对应的 `a` 中的列 +`a(:,find(v>0.5))` | `a[:,v.T>0.5]` | `a[:,v.T>0.5)]` | 找出列向量 `v>0.5` 对应的 `a` 中的列 +`a .* (a>0.5)` | `a * (a>0.5)` | `mat(a.A * (a>0.5).A)` | 将所有小于 0.5 的元素设为 0 +`1:10` | `arange(1.,11.), r_[1.:11.], r_[1:10:10j]` | `mat(arange(1.,11.)), r_[1.:11., 'r']` | 这里 `1.` 是为了将其转化为浮点数组 +`0:9` | `arange(10.), r_[:10.], r_[:9:10j]` | `mat(arange(10.)), r_[:10., 'r']` | +`[1:10]'` | `arange(1.,11.)[:,newaxis]` | `r_[1.:11.,'c']` | 列向量 +`zeros, ones, eye, diag, linspace` | `zeros, ones, eye, diag, linspace` | `mat(...)` | +`rand(3,4)` | `random.rand(3,4)` | `mat(...)` | 0~1 随机数 +`[x,y]=meshgrid(0:8,0:5)` | `mgrid[0:9., 0:6.], meshgrid(r_[0:9.],r_[0:6.])` | `mat(...)` | 网格 +| `ogrid[0:9.,0:6.], ix_(r_[0:9.],r_[0:6.])` | `mat()` | 建议在 `Numpy` 中使用 +`[x,y]=meshgrid([1,2,4],[2,4,5])`|`meshgrid([1,2,4],[2,4,5])`|`mat(...)`| +|`ix_([1,2,4],[2,4,5])`|`mat(...)`| +`repmat(a, m, n)`|`tile(a, (m,n))`|`mat(...)`| 产生 `m x n` 个 `a` +`[a b]` | `c_[a,b]`|`concatenate((a,b),1)`| 列对齐连接 +`[a; b]` | `r_[a,b]`|`concatenate((a,b))`| 行对齐连接 +`norm(v)` | `sqrt(dot(v,v)), linalg.norm(v)` | `sqrt(dot(v.A,v.A)), linalg.norm(v)` | 模 +`[Q,R,P]=qr(a,0)` | `Q,R = scipy.linalg.qr(a)` | `mat(...)` | QR 分解 +`[L,U,P]=lu(a)` | `L,U = Sci.linalg.lu(a)` | `mat(...)` | LU 分解 +`fft(a)` | `fft(a)` | `mat(...)` | FFT +`ifft(a)` | `ifft(a)` | `mat(...)` | IFFT +`sort(a)` | `sort(a),a.sort` | `mat(...)` | 排序 + +参考:http://wiki.scipy.org/NumPy_for_Matlab_Users#whichNotes diff --git a/docs/04-scipy/04.01-scienticfic-python-overview.md b/docs/04-scipy/04.01-scienticfic-python-overview.md new file mode 100644 index 00000000..b2dfafde --- /dev/null +++ b/docs/04-scipy/04.01-scienticfic-python-overview.md @@ -0,0 +1,184 @@ + +# SCIentific PYthon 简介 + +**`Ipython`** 提供了一个很好的解释器界面。 + +**`Matplotlib`** 提供了一个类似 **`Matlab`** 的画图工具。 + +**`Numpy`** 提供了 `ndarray` 对象,可以进行快速的向量化计算。 + +**`Scipy`** 是 **`Python`** 中进行科学计算的一个第三方库,以 **`Numpy`** 为基础。 + +**`Pandas`** 是处理时间序列数据的第三方库,提供一个类似 **`R`** 语言的环境。 + +**`StatsModels`** 是一个统计库,着重于统计模型。 + +**`Scikits`** 以 **`Scipy`** 为基础,提供如 **`scikits-learn` 机器学习**和**`scikits-image` 图像处理**等高级用法。 + +## Scipy + +**`Scipy`** 由不同科学计算领域的子模块组成: + +子模块|描述 +----|---- +`cluster`| 聚类算法 +`constants`| 物理数学常数 +`fftpack`| 快速傅里叶变换 +`integrate`| 积分和常微分方程求解 +`interpolate`| 插值 +`io`| 输入输出 +`linalg`| 线性代数 +`odr`| 正交距离回归 +`optimize`| 优化和求根 +`signal`| 信号处理 +`sparse`| 稀疏矩阵 +`spatial`| 空间数据结构和算法 +`special`| 特殊方程 +`stats`| 统计分布和函数 +`weave`| C/C++ 积分 + +在使用 **`Scipy`** 之前,为了方便,假定这些基础的模块已经被导入: + + +```python +import numpy as np +import scipy as sp +import matplotlib as mpl +import matplotlib.pyplot as plt +``` + +使用 **Scipy** 中的子模块时,需要分别导入: + + +```python +from scipy import linalg, optimize +``` + +对于一些常用的函数,这些在子模块中的函数可以在 `scipy` 命名空间中调用。另一方面,由于 **`Scipy`** 以 **`Numpy`** 为基础,因此很多基础的 **`Numpy`** 函数可以在`scipy` 命名空间中直接调用。 + +我们可以使用 `numpy` 中的 `info` 函数来查看函数的文档: + + +```python +np.info(optimize.fmin) +``` + + fmin(func, x0, args=(), xtol=0.0001, ftol=0.0001, maxiter=None, maxfun=None, + full_output=0, disp=1, retall=0, callback=None) + + Minimize a function using the downhill simplex algorithm. + + This algorithm only uses function values, not derivatives or second + derivatives. + + Parameters + ---------- + func : callable func(x,*args) + The objective function to be minimized. + x0 : ndarray + Initial guess. + args : tuple, optional + Extra arguments passed to func, i.e. ``f(x,*args)``. + callback : callable, optional + Called after each iteration, as callback(xk), where xk is the + current parameter vector. + xtol : float, optional + Relative error in xopt acceptable for convergence. + ftol : number, optional + Relative error in func(xopt) acceptable for convergence. + maxiter : int, optional + Maximum number of iterations to perform. + maxfun : number, optional + Maximum number of function evaluations to make. + full_output : bool, optional + Set to True if fopt and warnflag outputs are desired. + disp : bool, optional + Set to True to print convergence messages. + retall : bool, optional + Set to True to return list of solutions at each iteration. + + Returns + ------- + xopt : ndarray + Parameter that minimizes function. + fopt : float + Value of function at minimum: ``fopt = func(xopt)``. + iter : int + Number of iterations performed. + funcalls : int + Number of function calls made. + warnflag : int + 1 : Maximum number of function evaluations made. + 2 : Maximum number of iterations reached. + allvecs : list + Solution at each iteration. + + See also + -------- + minimize: Interface to minimization algorithms for multivariate + functions. See the 'Nelder-Mead' `method` in particular. + + Notes + ----- + Uses a Nelder-Mead simplex algorithm to find the minimum of function of + one or more variables. + + This algorithm has a long history of successful use in applications. + But it will usually be slower than an algorithm that uses first or + second derivative information. In practice it can have poor + performance in high-dimensional problems and is not robust to + minimizing complicated functions. Additionally, there currently is no + complete theory describing when the algorithm will successfully + converge to the minimum, or how fast it will if it does. + + References + ---------- + .. [1] Nelder, J.A. and Mead, R. (1965), "A simplex method for function + minimization", The Computer Journal, 7, pp. 308-313 + + .. [2] Wright, M.H. (1996), "Direct Search Methods: Once Scorned, Now + Respectable", in Numerical Analysis 1995, Proceedings of the + 1995 Dundee Biennial Conference in Numerical Analysis, D.F. + Griffiths and G.A. Watson (Eds.), Addison Wesley Longman, + Harlow, UK, pp. 191-208. + + +可以用 `lookfor` 来查询特定关键词相关的函数: + + +```python +np.lookfor("resize array") +``` + + Search results for 'resize array' + --------------------------------- + numpy.chararray.resize + Change shape and size of array in-place. + numpy.ma.resize + Return a new masked array with the specified size and shape. + numpy.oldnumeric.ma.resize + The original array's total size can be any size. + numpy.resize + Return a new array with the specified shape. + numpy.chararray + chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0, + numpy.memmap + Create a memory-map to an array stored in a *binary* file on disk. + numpy.ma.mvoid.resize + .. warning:: + + +还可以指定查找的模块: + + +```python +np.lookfor("remove path", module="os") +``` + + Search results for 'remove path' + -------------------------------- + os.removedirs + removedirs(path) + os.walk + Directory tree generator. + diff --git a/docs/04-scipy/04.02-interpolation-with-scipy.md b/docs/04-scipy/04.02-interpolation-with-scipy.md new file mode 100644 index 00000000..6e7901e2 --- /dev/null +++ b/docs/04-scipy/04.02-interpolation-with-scipy.md @@ -0,0 +1,478 @@ + +# 插值 + + +```python +import numpy as np +import matplotlib.pyplot as plt +%matplotlib inline +``` + +设置 **`Numpy`** 浮点数显示格式: + + +```python +np.set_printoptions(precision=2, suppress=True) +``` + +从文本中读入数据,数据来自 http://kinetics.nist.gov/janaf/html/C-067.txt ,保存为结构体数组: + + +```python +data = np.genfromtxt("JANAF_CH4.txt", + delimiter="\t", # TAB 分隔 + skiprows=1, # 忽略首行 + names=True, # 读入属性 + missing_values="INFINITE", # 缺失值 + filling_values=np.inf) # 填充缺失值 +``` + +显示部分数据: + + +```python +for row in data[:7]: + print "{}\t{}".format(row['TK'], row['Cp']) +print "...\t..." +``` + + 0.0 0.0 + 100.0 33.258 + 200.0 33.473 + 250.0 34.216 + 298.15 35.639 + 300.0 35.708 + 350.0 37.874 + ... ... + + +绘图: + + +```python +p = plt.plot(data['TK'], data['Cp'], 'kx') +t = plt.title("JANAF data for Methane $CH_4$") +a = plt.axis([0, 6000, 30, 120]) +x = plt.xlabel("Temperature (K)") +y = plt.ylabel(r"$C_p$ ($\frac{kJ}{kg K}$)") +``` + + +![png](output_9_0.png) + + +## 插值 + +假设我们要对这组数据进行插值。 + +先导入一维插值函数 `interp1d`: + + interp1d(x, y) + + +```python +from scipy.interpolate import interp1d +``` + + +```python +ch4_cp = interp1d(data['TK'], data['Cp']) +``` + +`interp1d` 的返回值可以像函数一样接受输入,并返回插值的结果。 + +单个输入值,注意返回的是数组: + + +```python +ch4_cp(382.2) +``` + + + + + array(39.565144000000004) + + + +输入数组,返回的是对应的数组: + + +```python +ch4_cp([32.2,323.2]) +``` + + + + + array([ 10.71, 36.71]) + + + +默认情况下,输入值要在插值允许的范围内,否则插值会报错: + + +```python +ch4_cp(8752) +``` + + + --------------------------------------------------------------------------- + + ValueError Traceback (most recent call last) + + in () + ----> 1 ch4_cp(8752) + + + d:\Miniconda\lib\site-packages\scipy\interpolate\polyint.pyc in __call__(self, x) + 77 """ + 78 x, x_shape = self._prepare_x(x) + ---> 79 y = self._evaluate(x) + 80 return self._finish_y(y, x_shape) + 81 + + + d:\Miniconda\lib\site-packages\scipy\interpolate\interpolate.pyc in _evaluate(self, x_new) + 496 # The behavior is set by the bounds_error variable. + 497 x_new = asarray(x_new) + --> 498 out_of_bounds = self._check_bounds(x_new) + 499 y_new = self._call(self, x_new) + 500 if len(y_new) > 0: + + + d:\Miniconda\lib\site-packages\scipy\interpolate\interpolate.pyc in _check_bounds(self, x_new) + 526 "range.") + 527 if self.bounds_error and above_bounds.any(): + --> 528 raise ValueError("A value in x_new is above the interpolation " + 529 "range.") + 530 + + + ValueError: A value in x_new is above the interpolation range. + + +但我们可以通过参数设置允许超出范围的值存在: + + +```python +ch4_cp = interp1d(data['TK'], data['Cp'], + bounds_error=False) +``` + +不过由于超出范围,所以插值的输出是非法值: + + +```python +ch4_cp(8752) +``` + + + + + array(nan) + + + +可以使用指定值替代这些非法值: + + +```python +ch4_cp = interp1d(data['TK'], data['Cp'], + bounds_error=False, fill_value=-999.25) +``` + + +```python +ch4_cp(8752) +``` + + + + + array(-999.25) + + + +### 线性插值 + +`interp1d` 默认的插值方法是线性,关于线性插值的定义,请参见: + +- 维基百科-线性插值: https://zh.wikipedia.org/wiki/%E7%BA%BF%E6%80%A7%E6%8F%92%E5%80%BC +- 百度百科-线性插值: http://baike.baidu.com/view/4685624.htm + +其基本思想是,已知相邻两点 $x_1,x_2$ 对应的值 $y_1,y_2$ ,那么对于 $(x_1,x_2)$ 之间的某一点 $x$ ,线性插值对应的值 $y$ 满足:点 $(x,y)$ 在 $(x_1,y_1),(x_2,y_2)$ 所形成的线段上。 + +应用线性插值: + + +```python +T = np.arange(100,355,5) +plt.plot(T, ch4_cp(T), "+k") +p = plt.plot(data['TK'][1:7], data['Cp'][1:7], 'ro', markersize=8) +``` + + +![png](output_29_0.png) + + +其中红色的圆点为原来的数据点,黑色的十字点为对应的插值点,可以明显看到,相邻的数据点的插值在一条直线上。 + +### 多项式插值 + +我们可以通过 `kind` 参数来调节使用的插值方法,来得到不同的结果: + +- `nearest` 最近邻插值 +- `zero` 0阶插值 +- `linear` 线性插值 +- `quadratic` 二次插值 +- `cubic` 三次插值 +- `4,5,6,7` 更高阶插值 + +最近邻插值: + + +```python +cp_ch4 = interp1d(data['TK'], data['Cp'], kind="nearest") +p = plt.plot(T, cp_ch4(T), "k+") +p = plt.plot(data['TK'][1:7], data['Cp'][1:7], 'ro', markersize=8) +``` + + +![png](output_33_0.png) + + +0阶插值: + + +```python +cp_ch4 = interp1d(data['TK'], data['Cp'], kind="zero") +p = plt.plot(T, cp_ch4(T), "k+") +p = plt.plot(data['TK'][1:7], data['Cp'][1:7], 'ro', markersize=8) +``` + + +![png](output_35_0.png) + + +二次插值: + + +```python +cp_ch4 = interp1d(data['TK'], data['Cp'], kind="quadratic") +p = plt.plot(T, cp_ch4(T), "k+") +p = plt.plot(data['TK'][1:7], data['Cp'][1:7], 'ro', markersize=8) +``` + + +![png](output_37_0.png) + + +三次插值: + + +```python +cp_ch4 = interp1d(data['TK'], data['Cp'], kind="cubic") +p = plt.plot(T, cp_ch4(T), "k+") +p = plt.plot(data['TK'][1:7], data['Cp'][1:7], 'ro', markersize=8) +``` + + +![png](output_39_0.png) + + +事实上,我们可以使用更高阶的多项式插值,只要将 `kind` 设为对应的数字即可: + +四次多项式插值: + + +```python +cp_ch4 = interp1d(data['TK'], data['Cp'], kind=4) +p = plt.plot(T, cp_ch4(T), "k+") +p = plt.plot(data['TK'][1:7], data['Cp'][1:7], 'ro', markersize=8) +``` + + +![png](output_42_0.png) + + +可以参见: + +- 维基百科-多项式插值:https://zh.wikipedia.org/wiki/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8F%92%E5%80%BC +- 百度百科-插值法:http://baike.baidu.com/view/754506.htm + +对于二维乃至更高维度的多项式插值: + + +```python +from scipy.interpolate import interp2d, interpnd +``` + +其使用方法与一维类似。 + +### 径向基函数 + +关于径向基函数,可以参阅: +- 维基百科-Radial basis fucntion:https://en.wikipedia.org/wiki/Radial_basis_function + +径向基函数,简单来说就是点 $x$ 处的函数值只依赖于 $x$ 与某点 $c$ 的距离: + +$$\Phi(x,c) = \Phi(\|x-c\|)$$ + + +```python +x = np.linspace(-3,3,100) +``` + +常用的径向基(`RBF`)函数有: + +高斯函数: + + +```python +plt.plot(x, np.exp(-1 * x **2)) +t = plt.title("Gaussian") +``` + + +![png](output_50_0.png) + + +`Multiquadric` 函数: + + +```python +plt.plot(x, np.sqrt(1 + x **2)) +t = plt.title("Multiquadric") +``` + + +![png](output_52_0.png) + + +`Inverse Multiquadric` 函数: + + +```python +plt.plot(x, 1. / np.sqrt(1 + x **2)) +t = plt.title("Inverse Multiquadric") +``` + + +![png](output_54_0.png) + + +### 径向基函数插值 + +对于径向基函数,其插值的公式为: + +$$ +f(x) = \sum_j n_j \Phi(\|x-x_j\|) +$$ + +我们通过数据点 $x_j$ 来计算出 $n_j$ 的值,来计算 $x$ 处的插值结果。 + + +```python +from scipy.interpolate.rbf import Rbf +``` + +使用 `multiquadric` 核的: + + +```python +cp_rbf = Rbf(data['TK'], data['Cp'], function = "multiquadric") +plt.plot(data['TK'], data['Cp'], 'k+') +p = plt.plot(data['TK'], cp_rbf(data['TK']), 'r-') +``` + + +![png](output_59_0.png) + + +使用 `gaussian` 核: + + +```python +cp_rbf = Rbf(data['TK'], data['Cp'], function = "gaussian") +plt.plot(data['TK'], data['Cp'], 'k+') +p = plt.plot(data['TK'], cp_rbf(data['TK']), 'r-') +``` + + +![png](output_61_0.png) + + +使用 `nverse_multiquadric` 核: + + +```python +cp_rbf = Rbf(data['TK'], data['Cp'], function = "inverse_multiquadric") +plt.plot(data['TK'], data['Cp'], 'k+') +p = plt.plot(data['TK'], cp_rbf(data['TK']), 'r-') +``` + + +![png](output_63_0.png) + + +不同的 `RBF` 核的结果也不同。 + +### 高维 `RBF` 插值 + + +```python +from mpl_toolkits.mplot3d import Axes3D +``` + +三维数据点: + + +```python +x, y = np.mgrid[-np.pi/2:np.pi/2:5j, -np.pi/2:np.pi/2:5j] +z = np.cos(np.sqrt(x**2 + y**2)) +``` + + +```python +fig = plt.figure(figsize=(12,6)) +ax = fig.gca(projection="3d") +ax.scatter(x,y,z) +``` + + + + + + + + + +![png](output_69_1.png) + + +3维 `RBF` 插值: + + +```python +zz = Rbf(x, y, z) +``` + + +```python +xx, yy = np.mgrid[-np.pi/2:np.pi/2:50j, -np.pi/2:np.pi/2:50j] +fig = plt.figure(figsize=(12,6)) +ax = fig.gca(projection="3d") +ax.plot_surface(xx,yy,zz(xx,yy),rstride=1, cstride=1, cmap=plt.cm.jet) +``` + + + + + + + + + +![png](output_72_1.png) + diff --git a/docs/04-scipy/04.03-statistics-with-scipy.md b/docs/04-scipy/04.03-statistics-with-scipy.md new file mode 100644 index 00000000..88d41309 --- /dev/null +++ b/docs/04-scipy/04.03-statistics-with-scipy.md @@ -0,0 +1,644 @@ + +# 概率统计方法 + +## 简介 + +**`Python`** 中常用的统计工具有 **`Numpy, Pandas, PyMC, StatsModels`** 等。 + +**`Scipy`** 中的子库 `scipy.stats` 中包含很多统计上的方法。 + +导入 `numpy` 和 `matplotlib`: + + +```python +%pylab inline +``` + + Populating the interactive namespace from numpy and matplotlib + + + +```python +heights = array([1.46, 1.79, 2.01, 1.75, 1.56, 1.69, 1.88, 1.76, 1.88, 1.78]) +``` + +`Numpy` 自带简单的统计方法: + + +```python +print 'mean, ', heights.mean() +print 'min, ', heights.min() +print 'max, ', heights.max() +print 'standard deviation, ', heights.std() +``` + + mean, 1.756 + min, 1.46 + max, 2.01 + standard deviation, 0.150811140172 + + +导入 **`Scipy`** 的统计模块: + + +```python +import scipy.stats.stats as st +``` + +其他统计量: + + +```python +print 'median, ', st.nanmedian(heights) # 忽略nan值之后的中位数 +print 'mode, ', st.mode(heights) # 众数及其出现次数 +print 'skewness, ', st.skew(heights) # 偏度 +print 'kurtosis, ', st.kurtosis(heights) # 峰度 +print 'and so many more...' +``` + + median, 1.77 + mode, (array([ 1.88]), array([ 2.])) + skewness, -0.393524456473 + kurtosis, -0.330672097724 + and so many more... + + +## 概率分布 + +常见的[连续概率分布](https://zh.wikipedia.org/wiki/Category:%E8%BF%9E%E7%BB%AD%E5%88%86%E5%B8%83)有: + +- 均匀分布 +- 正态分布 +- 学生`t`分布 +- `F`分布 +- `Gamma`分布 +- ... + +[离散概率分布](https://zh.wikipedia.org/wiki/Category:%E7%A6%BB%E6%95%A3%E5%88%86%E5%B8%83): + +- 伯努利分布 +- 几何分布 +- ... + +这些都可以在 `scipy.stats` 中找到。 + +## 连续分布 + +### 正态分布 + +以[正态分布](https://zh.wikipedia.org/wiki/%E6%AD%A3%E6%80%81%E5%88%86%E5%B8%83)为例,先导入正态分布: + + +```python +from scipy.stats import norm +``` + +它包含四类常用的函数: + +- `norm.cdf` 返回对应的[累计分布函数](https://zh.wikipedia.org/wiki/%E7%B4%AF%E7%A7%AF%E5%88%86%E5%B8%83%E5%87%BD%E6%95%B0)值 +- `norm.pdf` 返回对应的[概率密度函数](https://zh.wikipedia.org/wiki/%E6%A9%9F%E7%8E%87%E5%AF%86%E5%BA%A6%E5%87%BD%E6%95%B8)值 +- `norm.rvs` 产生指定参数的随机变量 +- `norm.fit` 返回给定数据下,各参数的[最大似然估计](https://zh.wikipedia.org/wiki/%E6%9C%80%E5%A4%A7%E4%BC%BC%E7%84%B6%E4%BC%B0%E8%AE%A1)(MLE)值 + +从正态分布产生500个随机点: + + +```python +x_norm = norm.rvs(size=500) +type(x_norm) +``` + + + + + numpy.ndarray + + + +直方图: + + +```python +h = hist(x_norm) +print 'counts, ', h[0] +print 'bin centers', h[1] +``` + + counts, [ 7. 21. 42. 97. 120. 91. 64. 38. 17. 3.] + bin centers [-2.68067801 -2.13266147 -1.58464494 -1.0366284 -0.48861186 0.05940467 + 0.60742121 1.15543774 1.70345428 2.25147082 2.79948735] + + + +![png](output_20_1.png) + + +归一化直方图(用出现频率代替次数),将划分区间变为 `20`(默认 `10`): + + +```python +h = hist(x_norm, normed=True, bins=20) +``` + + +![png](output_22_0.png) + + +在这组数据下,正态分布参数的最大似然估计值为: + + +```python +x_mean, x_std = norm.fit(x_norm) + +print 'mean, ', x_mean +print 'x_std, ', x_std +``` + + mean, -0.0426135499965 + x_std, 0.950754110144 + + +将真实的概率密度函数与直方图进行比较: + + +```python +h = hist(x_norm, normed=True, bins=20) + +x = linspace(-3,3,50) +p = plot(x, norm.pdf(x), 'r-') +``` + + +![png](output_26_0.png) + + +导入积分函数: + + +```python +from scipy.integrate import trapz +``` + +通过积分,计算落在某个区间的概率大小: + + +```python +x1 = linspace(-2,2,108) +p = trapz(norm.pdf(x1), x1) +print '{:.2%} of the values lie between -2 and 2'.format(p) + +fill_between(x1, norm.pdf(x1), color = 'red') +plot(x, norm.pdf(x), 'k-') +``` + + 95.45% of the values lie between -2 and 2 + + + + + + [] + + + + +![png](output_30_2.png) + + +默认情况,正态分布的参数为均值0,标准差1,即标准正态分布。 + +可以通过 `loc` 和 `scale` 来调整这些参数,一种方法是调用相关函数时进行输入: + + +```python +p = plot(x, norm.pdf(x, loc=0, scale=1)) +p = plot(x, norm.pdf(x, loc=0.5, scale=2)) +p = plot(x, norm.pdf(x, loc=-0.5, scale=.5)) +``` + + +![png](output_33_0.png) + + +另一种则是将 `loc, scale` 作为参数直接输给 `norm` 生成相应的分布: + + +```python +p = plot(x, norm(loc=0, scale=1).pdf(x)) +p = plot(x, norm(loc=0.5, scale=2).pdf(x)) +p = plot(x, norm(loc=-0.5, scale=.5).pdf(x)) +``` + + +![png](output_35_0.png) + + +### 其他连续分布 + + +```python +from scipy.stats import lognorm, t, dweibull +``` + +支持与 `norm` 类似的操作,如概率密度函数等。 + +不同参数的[对数正态分布](https://zh.wikipedia.org/wiki/%E5%AF%B9%E6%95%B0%E6%AD%A3%E6%80%81%E5%88%86%E5%B8%83): + + +```python +x = linspace(0.01, 3, 100) + +plot(x, lognorm.pdf(x, 1), label='s=1') +plot(x, lognorm.pdf(x, 2), label='s=2') +plot(x, lognorm.pdf(x, .1), label='s=0.1') + +legend() +``` + + + + + + + + + +![png](output_40_1.png) + + +不同的[韦氏分布](https://zh.wikipedia.org/wiki/%E9%9F%A6%E4%BC%AF%E5%88%86%E5%B8%83): + + +```python +x = linspace(0.01, 3, 100) + +plot(x, dweibull.pdf(x, 1), label='s=1, constant failure rate') +plot(x, dweibull.pdf(x, 2), label='s>1, increasing failure rate') +plot(x, dweibull.pdf(x, .1), label='0 + + + + +![png](output_42_1.png) + + +不同自由度的[学生 `t` 分布](https://zh.wikipedia.org/wiki/%E5%AD%A6%E7%94%9Ft-%E5%88%86%E5%B8%83): + + +```python +x = linspace(-3, 3, 100) + +plot(x, t.pdf(x, 1), label='df=1') +plot(x, t.pdf(x, 2), label='df=2') +plot(x, t.pdf(x, 100), label='df=100') +plot(x[::5], norm.pdf(x[::5]), 'kx', label='normal') + +legend() +``` + + + + + + + + + +![png](output_44_1.png) + + +## 离散分布 + +导入离散分布: + + +```python +from scipy.stats import binom, poisson, randint +``` + +离散分布没有概率密度函数,但是有[概率质量函数](https://zh.wikipedia.org/wiki/%E6%A6%82%E7%8E%87%E8%B4%A8%E9%87%8F%E5%87%BD%E6%95%B0)。 + +[离散均匀分布](https://zh.wikipedia.org/wiki/%E9%9B%A2%E6%95%A3%E5%9E%8B%E5%9D%87%E5%8B%BB%E5%88%86%E4%BD%88)的概率质量函数(PMF): + + +```python +high = 10 +low = -10 + +x = arange(low, high+1, 0.5) +p = stem(x, randint(low, high).pmf(x)) # 杆状图 +``` + + +![png](output_50_0.png) + + +[二项分布](https://zh.wikipedia.org/wiki/%E4%BA%8C%E9%A0%85%E5%88%86%E4%BD%88): + + +```python +num_trials = 60 +x = arange(num_trials) + +plot(x, binom(num_trials, 0.5).pmf(x), 'o-', label='p=0.5') +plot(x, binom(num_trials, 0.2).pmf(x), 'o-', label='p=0.2') + +legend() +``` + + + + + + + + + +![png](output_52_1.png) + + +[泊松分布](https://zh.wikipedia.org/wiki/%E6%B3%8A%E6%9D%BE%E5%88%86%E4%BD%88): + + +```python +x = arange(0,21) + +plot(x, poisson(1).pmf(x), 'o-', label=r'$\lambda$=1') +plot(x, poisson(4).pmf(x), 'o-', label=r'$\lambda$=4') +plot(x, poisson(9).pmf(x), 'o-', label=r'$\lambda$=9') + +legend() +``` + + + + + + + + + +![png](output_54_1.png) + + +## 自定义离散分布 + +导入要用的函数: + + +```python +from scipy.stats import rv_discrete +``` + +一个不均匀的骰子对应的离散值及其概率: + + +```python +xk = [1, 2, 3, 4, 5, 6] +pk = [.3, .35, .25, .05, .025, .025] +``` + +定义离散分布: + + +```python +loaded = rv_discrete(values=(xk, pk)) +``` + +此时, `loaded` 可以当作一个离散分布的模块来使用。 + +产生两个服从该分布的随机变量: + + +```python +loaded.rvs(size=2) +``` + + + + + array([3, 1]) + + + +产生100个随机变量,将直方图与概率质量函数进行比较: + + +```python +samples = loaded.rvs(size=100) +bins = linspace(.5,6.5,7) + +hist(samples, bins=bins, normed=True) +stem(xk, loaded.pmf(xk), markerfmt='ro', linefmt='r-') +``` + + + + + + + + + +![png](output_66_1.png) + + +## 假设检验 + +导入相关的函数: + +- 正态分布 +- 独立双样本 `t` 检验,配对样本 `t` 检验,单样本 `t` 检验 +- 学生 `t` 分布 + +`t` 检验的相关内容请参考: +- 百度百科-`t` 检验:http://baike.baidu.com/view/557340.htm +- 维基百科-学生 `t` 检验:https://en.wikipedia.org/wiki/Student%27s_t-test + + +```python +from scipy.stats import norm +from scipy.stats import ttest_ind, ttest_rel, ttest_1samp +from scipy.stats import t +``` + +### 独立样本 t 检验 + +两组参数不同的正态分布: + + +```python +n1 = norm(loc=0.3, scale=1.0) +n2 = norm(loc=0, scale=1.0) +``` + +从分布中产生两组随机样本: + + +```python +n1_samples = n1.rvs(size=100) +n2_samples = n2.rvs(size=100) +``` + +将两组样本混合在一起: + + +```python +samples = hstack((n1_samples, n2_samples)) +``` + +最大似然参数估计: + + +```python +loc, scale = norm.fit(samples) +n = norm(loc=loc, scale=scale) +``` + +比较: + + +```python +x = linspace(-3,3,100) + +hist([samples, n1_samples, n2_samples], normed=True) +plot(x, n.pdf(x), 'b-') +plot(x, n1.pdf(x), 'g-') +plot(x, n2.pdf(x), 'r-') +``` + + + + + [] + + + + +![png](output_80_1.png) + + +独立双样本 `t` 检验的目的在于判断两组样本之间是否有显著差异: + + +```python +t_val, p = ttest_ind(n1_samples, n2_samples) + +print 't = {}'.format(t_val) +print 'p-value = {}'.format(p) +``` + + t = 0.868384594123 + p-value = 0.386235148899 + + +`p` 值小,说明这两个样本有显著性差异。 + +### 配对样本 t 检验 + +配对样本指的是两组样本之间的元素一一对应,例如,假设我们有一组病人的数据: + + +```python +pop_size = 35 + +pre_treat = norm(loc=0, scale=1) +n0 = pre_treat.rvs(size=pop_size) +``` + +经过某种治疗后,对这组病人得到一组新的数据: + + +```python +effect = norm(loc=0.05, scale=0.2) +eff = effect.rvs(size=pop_size) + +n1 = n0 + eff +``` + +新数据的最大似然估计: + + +```python +loc, scale = norm.fit(n1) +post_treat = norm(loc=loc, scale=scale) +``` + +画图: + + +```python +fig = figure(figsize=(10,4)) + +ax1 = fig.add_subplot(1,2,1) +h = ax1.hist([n0, n1], normed=True) +p = ax1.plot(x, pre_treat.pdf(x), 'b-') +p = ax1.plot(x, post_treat.pdf(x), 'g-') + +ax2 = fig.add_subplot(1,2,2) +h = ax2.hist(eff, normed=True) +``` + + +![png](output_92_0.png) + + +独立 `t` 检验: + + +```python +t_val, p = ttest_ind(n0, n1) + +print 't = {}'.format(t_val) +print 'p-value = {}'.format(p) +``` + + t = -0.347904839913 + p-value = 0.728986322039 + + +高 `p` 值说明两组样本之间没有显著性差异。 + +配对 `t` 检验: + + +```python +t_val, p = ttest_rel(n0, n1) + +print 't = {}'.format(t_val) +print 'p-value = {}'.format(p) +``` + + t = -1.89564459709 + p-value = 0.0665336223673 + + +配对 `t` 检验的结果说明,配对样本之间存在显著性差异,说明治疗时有效的,符合我们的预期。 + +### `p` 值计算原理 + +`p` 值对应的部分是下图中的红色区域,边界范围由 `t` 值决定。 + + +```python +my_t = t(pop_size) # 传入参数为自由度,这里自由度为50 + +p = plot(x, my_t.pdf(x), 'b-') +lower_x = x[x<= -abs(t_val)] +upper_x = x[x>= abs(t_val)] + +p = fill_between(lower_x, my_t.pdf(lower_x), color='red') +p = fill_between(upper_x, my_t.pdf(upper_x), color='red') +``` + + +![png](output_100_0.png) + diff --git a/docs/04-scipy/04.04-curve-fitting.md b/docs/04-scipy/04.04-curve-fitting.md new file mode 100644 index 00000000..1ff2de13 --- /dev/null +++ b/docs/04-scipy/04.04-curve-fitting.md @@ -0,0 +1,437 @@ + +# 曲线拟合 + +导入基础包: + + +```python +import numpy as np +import matplotlib as mpl +import matplotlib.pyplot as plt +``` + +## 多项式拟合 + +导入线多项式拟合工具: + + +```python +from numpy import polyfit, poly1d +``` + +产生数据: + + +```python +x = np.linspace(-5, 5, 100) +y = 4 * x + 1.5 +noise_y = y + np.random.randn(y.shape[-1]) * 2.5 +``` + +画出数据: + + +```python +%matplotlib inline + +p = plt.plot(x, noise_y, 'rx') +p = plt.plot(x, y, 'b:') +``` + + +![png](output_9_0.png) + + +进行线性拟合,`polyfit` 是多项式拟合函数,线性拟合即一阶多项式: + + +```python +coeff = polyfit(x, noise_y, 1) +print coeff +``` + + [ 3.93921315 1.59379469] + + +一阶多项式 $y = a_1 x + a_0$ 拟合,返回两个系数 $[a_1, a_0]$。 + +画出拟合曲线: + + +```python +p = plt.plot(x, noise_y, 'rx') +p = plt.plot(x, coeff[0] * x + coeff[1], 'k-') +p = plt.plot(x, y, 'b--') +``` + + +![png](output_13_0.png) + + +还可以用 `poly1d` 生成一个以传入的 `coeff` 为参数的多项式函数: + + +```python +f = poly1d(coeff) +p = plt.plot(x, noise_y, 'rx') +p = plt.plot(x, f(x)) +``` + + +![png](output_15_0.png) + + + +```python +f +``` + + + + + poly1d([ 3.93921315, 1.59379469]) + + + +显示 `f`: + + +```python +print f +``` + + + 3.939 x + 1.594 + + +还可以对它进行数学操作生成新的多项式: + + +```python +print f + 2 * f ** 2 +``` + + 2 + 31.03 x + 29.05 x + 6.674 + + +## 多项式拟合正弦函数 + +正弦函数: + + +```python +x = np.linspace(-np.pi,np.pi,100) +y = np.sin(x) +``` + +用一阶到九阶多项式拟合,类似泰勒展开: + + +```python +y1 = poly1d(polyfit(x,y,1)) +y3 = poly1d(polyfit(x,y,3)) +y5 = poly1d(polyfit(x,y,5)) +y7 = poly1d(polyfit(x,y,7)) +y9 = poly1d(polyfit(x,y,9)) +``` + + +```python +x = np.linspace(-3 * np.pi,3 * np.pi,100) + +p = plt.plot(x, np.sin(x), 'k') +p = plt.plot(x, y1(x)) +p = plt.plot(x, y3(x)) +p = plt.plot(x, y5(x)) +p = plt.plot(x, y7(x)) +p = plt.plot(x, y9(x)) + +a = plt.axis([-3 * np.pi, 3 * np.pi, -1.25, 1.25]) +``` + + +![png](output_26_0.png) + + +黑色为原始的图形,可以看到,随着多项式拟合的阶数的增加,曲线与拟合数据的吻合程度在逐渐增大。 + +## 最小二乘拟合 + +导入相关的模块: + + +```python +from scipy.linalg import lstsq +from scipy.stats import linregress +``` + + +```python +x = np.linspace(0,5,100) +y = 0.5 * x + np.random.randn(x.shape[-1]) * 0.35 + +plt.plot(x,y,'x') +``` + + + + + [] + + + + +![png](output_31_1.png) + + +一般来书,当我们使用一个 N-1 阶的多项式拟合这 M 个点时,有这样的关系存在: + +$$XC = Y$$ + +即 + +$$\left[ \begin{matrix} +x_0^{N-1} & \dots & x_0 & 1 \\\ +x_1^{N-1} & \dots & x_1 & 1 \\\ +\dots & \dots & \dots & \dots \\\ +x_M^{N-1} & \dots & x_M & 1 +\end{matrix}\right] +\left[ \begin{matrix} C_{N-1} \\\ \dots \\\ C_1 \\\ C_0 \end{matrix} \right] = +\left[ \begin{matrix} y_0 \\\ y_1 \\\ \dots \\\ y_M \end{matrix} \right]$$ + +### Scipy.linalg.lstsq 最小二乘解 + +要得到 `C` ,可以使用 `scipy.linalg.lstsq` 求最小二乘解。 + +这里,我们使用 1 阶多项式即 `N = 2`,先将 `x` 扩展成 `X`: + + +```python +X = np.hstack((x[:,np.newaxis], np.ones((x.shape[-1],1)))) +X[1:5] +``` + + + + + array([[ 0.05050505, 1. ], + [ 0.1010101 , 1. ], + [ 0.15151515, 1. ], + [ 0.2020202 , 1. ]]) + + + +求解: + + +```python +C, resid, rank, s = lstsq(X, y) +C, resid, rank, s +``` + + + + + (array([ 0.50432002, 0.0415695 ]), + 12.182942535066523, + 2, + array([ 30.23732043, 4.82146667])) + + + +画图: + + +```python +p = plt.plot(x, y, 'rx') +p = plt.plot(x, C[0] * x + C[1], 'k--') +print "sum squared residual = {:.3f}".format(resid) +print "rank of the X matrix = {}".format(rank) +print "singular values of X = {}".format(s) +``` + + sum squared residual = 12.183 + rank of the X matrix = 2 + singular values of X = [ 30.23732043 4.82146667] + + + +![png](output_39_1.png) + + +### Scipy.stats.linregress 线性回归 + +对于上面的问题,还可以使用线性回归进行求解: + + +```python +slope, intercept, r_value, p_value, stderr = linregress(x, y) +slope, intercept +``` + + + + + (0.50432001884393252, 0.041569499438028901) + + + + +```python +p = plt.plot(x, y, 'rx') +p = plt.plot(x, slope * x + intercept, 'k--') +print "R-value = {:.3f}".format(r_value) +print "p-value (probability there is no correlation) = {:.3e}".format(p_value) +print "Root mean squared error of the fit = {:.3f}".format(np.sqrt(stderr)) +``` + + R-value = 0.903 + p-value (probability there is no correlation) = 8.225e-38 + Root mean squared error of the fit = 0.156 + + + +![png](output_43_1.png) + + +可以看到,两者求解的结果是一致的,但是出发的角度是不同的。 + +## 更高级的拟合 + + +```python +from scipy.optimize import leastsq +``` + +先定义这个非线性函数:$y = a e^{-b sin( f x + \phi)}$ + + +```python +def function(x, a , b, f, phi): + """a function of x with four parameters""" + result = a * np.exp(-b * np.sin(f * x + phi)) + return result +``` + +画出原始曲线: + + +```python +x = np.linspace(0, 2 * np.pi, 50) +actual_parameters = [3, 2, 1.25, np.pi / 4] +y = function(x, *actual_parameters) +p = plt.plot(x,y) +``` + + +![png](output_50_0.png) + + +加入噪声: + + +```python +from scipy.stats import norm +y_noisy = y + 0.8 * norm.rvs(size=len(x)) +p = plt.plot(x, y, 'k-') +p = plt.plot(x, y_noisy, 'rx') +``` + + +![png](output_52_0.png) + + +### Scipy.optimize.leastsq + +定义误差函数,将要优化的参数放在前面: + + +```python +def f_err(p, y, x): + return y - function(x, *p) +``` + +将这个函数作为参数传入 `leastsq` 函数,第二个参数为初始值: + + +```python +c, ret_val = leastsq(f_err, [1, 1, 1, 1], args=(y_noisy, x)) +c, ret_val +``` + + + + + (array([ 3.03199715, 1.97689384, 1.30083191, 0.6393337 ]), 1) + + + +`ret_val` 是 1~4 时,表示成功找到最小二乘解: + + +```python +p = plt.plot(x, y_noisy, 'rx') +p = plt.plot(x, function(x, *c), 'k--') +``` + + +![png](output_59_0.png) + + +### Scipy.optimize.curve_fit + +更高级的做法: + + +```python +from scipy.optimize import curve_fit +``` + +不需要定义误差函数,直接传入 `function` 作为参数: + + +```python +p_est, err_est = curve_fit(function, x, y_noisy) +``` + + +```python +print p_est +p = plt.plot(x, y_noisy, "rx") +p = plt.plot(x, function(x, *p_est), "k--") +``` + + [ 3.03199711 1.97689385 1.3008319 0.63933373] + + + +![png](output_65_1.png) + + +这里第一个返回的是函数的参数,第二个返回值为各个参数的协方差矩阵: + + +```python +print err_est +``` + + [[ 0.08483704 -0.02782318 0.00967093 -0.03029038] + [-0.02782318 0.00933216 -0.00305158 0.00955794] + [ 0.00967093 -0.00305158 0.0014972 -0.00468919] + [-0.03029038 0.00955794 -0.00468919 0.01484297]] + + +协方差矩阵的对角线为各个参数的方差: + + +```python +print "normalized relative errors for each parameter" +print " a\t b\t f\tphi" +print np.sqrt(err_est.diagonal()) / p_est +``` + + normalized relative errors for each parameter + a b f phi + [ 0.09606473 0.0488661 0.02974528 0.19056043] + diff --git a/docs/04-scipy/04.05-minimization-in-python.md b/docs/04-scipy/04.05-minimization-in-python.md new file mode 100644 index 00000000..addd2150 --- /dev/null +++ b/docs/04-scipy/04.05-minimization-in-python.md @@ -0,0 +1,611 @@ + +# 最小化函数 + +## minimize 函数 + + +```python +%pylab inline +set_printoptions(precision=3, suppress=True) +``` + + Populating the interactive namespace from numpy and matplotlib + + +已知斜抛运动的水平飞行距离公式: + +$d = 2 \frac{v_0^2}{g} \sin(\theta) \cos (\theta)$ + +- $d$ 水平飞行距离 +- $v_0$ 初速度大小 +- $g$ 重力加速度 +- $\theta$ 抛出角度 + +希望找到使 $d$ 最大的角度 $\theta$。 + +定义距离函数: + + +```python +def dist(theta, v0): + """calculate the distance travelled by a projectile launched + at theta degrees with v0 (m/s) initial velocity. + """ + g = 9.8 + theta_rad = pi * theta / 180 + return 2 * v0 ** 2 / g * sin(theta_rad) * cos(theta_rad) +theta = linspace(0,90,90) +p = plot(theta, dist(theta, 1.)) +xl = xlabel(r'launch angle $\theta (^{\circ})$') +yl = ylabel('horizontal distance traveled') +``` + + +![png](output_4_0.png) + + +因为 `Scipy` 提供的是最小化方法,所以最大化距离就相当于最小化距离的负数: + + +```python +def neg_dist(theta, v0): + return -1 * dist(theta, v0) +``` + +导入 `scipy.optimize.minimize`: + + +```python +from scipy.optimize import minimize +result = minimize(neg_dist, 40, args=(1,)) +print "optimal angle = {:.1f} degrees".format(result.x[0]) +``` + + optimal angle = 45.0 degrees + + +`minimize` 接受三个参数:第一个是要优化的函数,第二个是初始猜测值,第三个则是优化函数的附加参数,默认 `minimize` 将优化函数的第一个参数作为优化变量,所以第三个参数输入的附加参数从优化函数的第二个参数开始。 + +查看返回结果: + + +```python +print result +``` + + status: 0 + success: True + njev: 18 + nfev: 54 + hess_inv: array([[ 8110.515]]) + fun: -0.10204079220645729 + x: array([ 45.02]) + message: 'Optimization terminated successfully.' + jac: array([ 0.]) + + +## Rosenbrock 函数 + +Rosenbrock 函数是一个用来测试优化函数效果的一个非凸函数: + +$f(x)=\sum\limits_{i=1}^{N-1}{100\left(x_{i+1}^2 - x_i\right) ^2 + \left(1-x_{i}\right)^2 }$ + +导入该函数: + + +```python +from scipy.optimize import rosen +from mpl_toolkits.mplot3d import Axes3D +``` + +使用 `N = 2` 的 Rosenbrock 函数: + + +```python +x, y = meshgrid(np.linspace(-2,2,25), np.linspace(-0.5,3.5,25)) +z = rosen([x,y]) +``` + +图像和最低点 `(1,1)`: + + +```python +fig = figure(figsize=(12,5.5)) +ax = fig.gca(projection="3d") +ax.azim = 70; ax.elev = 48 +ax.set_xlabel("X"); ax.set_ylabel("Y") +ax.set_zlim((0,1000)) +p = ax.plot_surface(x,y,z,rstride=1, cstride=1, cmap=cm.jet) +rosen_min = ax.plot([1],[1],[0],"ro") +``` + + +![png](output_17_0.png) + + +传入初始值: + + +```python +x0 = [1.3, 1.6, -0.5, -1.8, 0.8] +result = minimize(rosen, x0) +print result.x +``` + + [ 1. 1. 1. 1. 1.] + + +随机给定初始值: + + +```python +x0 = np.random.randn(10) +result = minimize(rosen, x0) +print x0 +print result.x +``` + + [ 0.815 -2.086 0.297 1.079 -0.528 0.461 -0.13 -0.715 0.734 0.621] + [-0.993 0.997 0.998 0.999 0.999 0.999 0.998 0.997 0.994 0.988] + + +对于 `N > 3`,函数的最小值为 $(x_1,x_2, ..., x_N) = (1,1,...,1)$,不过有一个局部极小值点 $(x_1,x_2, ..., x_N) = (-1,1,...,1)$,所以随机初始值如果选的不好的话,有可能返回的结果是局部极小值点: + +## 优化方法 + +### BFGS 算法 + +`minimize` 函数默认根据问题是否有界或者有约束,使用 `'BFGS', 'L-BFGS-B', 'SLSQP'` 中的一种。 + +可以查看帮助来得到更多的信息: + + +```python +info(minimize) +``` + + minimize(fun, x0, args=(), method=None, jac=None, hess=None, hessp=None, + bounds=None, constraints=(), tol=None, callback=None, options=None) + + Minimization of scalar function of one or more variables. + + Parameters + ---------- + fun : callable + Objective function. + x0 : ndarray + Initial guess. + args : tuple, optional + Extra arguments passed to the objective function and its + derivatives (Jacobian, Hessian). + method : str or callable, optional + Type of solver. Should be one of + + - 'Nelder-Mead' + - 'Powell' + - 'CG' + - 'BFGS' + - 'Newton-CG' + - 'Anneal (deprecated as of scipy version 0.14.0)' + - 'L-BFGS-B' + - 'TNC' + - 'COBYLA' + - 'SLSQP' + - 'dogleg' + - 'trust-ncg' + - custom - a callable object (added in version 0.14.0) + + If not given, chosen to be one of ``BFGS``, ``L-BFGS-B``, ``SLSQP``, + depending if the problem has constraints or bounds. + jac : bool or callable, optional + Jacobian (gradient) of objective function. Only for CG, BFGS, + Newton-CG, L-BFGS-B, TNC, SLSQP, dogleg, trust-ncg. + If `jac` is a Boolean and is True, `fun` is assumed to return the + gradient along with the objective function. If False, the + gradient will be estimated numerically. + `jac` can also be a callable returning the gradient of the + objective. In this case, it must accept the same arguments as `fun`. + hess, hessp : callable, optional + Hessian (matrix of second-order derivatives) of objective function or + Hessian of objective function times an arbitrary vector p. Only for + Newton-CG, dogleg, trust-ncg. + Only one of `hessp` or `hess` needs to be given. If `hess` is + provided, then `hessp` will be ignored. If neither `hess` nor + `hessp` is provided, then the Hessian product will be approximated + using finite differences on `jac`. `hessp` must compute the Hessian + times an arbitrary vector. + bounds : sequence, optional + Bounds for variables (only for L-BFGS-B, TNC and SLSQP). + ``(min, max)`` pairs for each element in ``x``, defining + the bounds on that parameter. Use None for one of ``min`` or + ``max`` when there is no bound in that direction. + constraints : dict or sequence of dict, optional + Constraints definition (only for COBYLA and SLSQP). + Each constraint is defined in a dictionary with fields: + type : str + Constraint type: 'eq' for equality, 'ineq' for inequality. + fun : callable + The function defining the constraint. + jac : callable, optional + The Jacobian of `fun` (only for SLSQP). + args : sequence, optional + Extra arguments to be passed to the function and Jacobian. + Equality constraint means that the constraint function result is to + be zero whereas inequality means that it is to be non-negative. + Note that COBYLA only supports inequality constraints. + tol : float, optional + Tolerance for termination. For detailed control, use solver-specific + options. + options : dict, optional + A dictionary of solver options. All methods accept the following + generic options: + maxiter : int + Maximum number of iterations to perform. + disp : bool + Set to True to print convergence messages. + For method-specific options, see :func:`show_options()`. + callback : callable, optional + Called after each iteration, as ``callback(xk)``, where ``xk`` is the + current parameter vector. + + Returns + ------- + res : OptimizeResult + The optimization result represented as a ``OptimizeResult`` object. + Important attributes are: ``x`` the solution array, ``success`` a + Boolean flag indicating if the optimizer exited successfully and + ``message`` which describes the cause of the termination. See + `OptimizeResult` for a description of other attributes. + + + See also + -------- + minimize_scalar : Interface to minimization algorithms for scalar + univariate functions + show_options : Additional options accepted by the solvers + + Notes + ----- + This section describes the available solvers that can be selected by the + 'method' parameter. The default method is *BFGS*. + + **Unconstrained minimization** + + Method *Nelder-Mead* uses the Simplex algorithm [1]_, [2]_. This + algorithm has been successful in many applications but other algorithms + using the first and/or second derivatives information might be preferred + for their better performances and robustness in general. + + Method *Powell* is a modification of Powell's method [3]_, [4]_ which + is a conjugate direction method. It performs sequential one-dimensional + minimizations along each vector of the directions set (`direc` field in + `options` and `info`), which is updated at each iteration of the main + minimization loop. The function need not be differentiable, and no + derivatives are taken. + + Method *CG* uses a nonlinear conjugate gradient algorithm by Polak and + Ribiere, a variant of the Fletcher-Reeves method described in [5]_ pp. + 120-122. Only the first derivatives are used. + + Method *BFGS* uses the quasi-Newton method of Broyden, Fletcher, + Goldfarb, and Shanno (BFGS) [5]_ pp. 136. It uses the first derivatives + only. BFGS has proven good performance even for non-smooth + optimizations. This method also returns an approximation of the Hessian + inverse, stored as `hess_inv` in the OptimizeResult object. + + Method *Newton-CG* uses a Newton-CG algorithm [5]_ pp. 168 (also known + as the truncated Newton method). It uses a CG method to the compute the + search direction. See also *TNC* method for a box-constrained + minimization with a similar algorithm. + + Method *Anneal* uses simulated annealing, which is a probabilistic + metaheuristic algorithm for global optimization. It uses no derivative + information from the function being optimized. + + Method *dogleg* uses the dog-leg trust-region algorithm [5]_ + for unconstrained minimization. This algorithm requires the gradient + and Hessian; furthermore the Hessian is required to be positive definite. + + Method *trust-ncg* uses the Newton conjugate gradient trust-region + algorithm [5]_ for unconstrained minimization. This algorithm requires + the gradient and either the Hessian or a function that computes the + product of the Hessian with a given vector. + + **Constrained minimization** + + Method *L-BFGS-B* uses the L-BFGS-B algorithm [6]_, [7]_ for bound + constrained minimization. + + Method *TNC* uses a truncated Newton algorithm [5]_, [8]_ to minimize a + function with variables subject to bounds. This algorithm uses + gradient information; it is also called Newton Conjugate-Gradient. It + differs from the *Newton-CG* method described above as it wraps a C + implementation and allows each variable to be given upper and lower + bounds. + + Method *COBYLA* uses the Constrained Optimization BY Linear + Approximation (COBYLA) method [9]_, [10]_, [11]_. The algorithm is + based on linear approximations to the objective function and each + constraint. The method wraps a FORTRAN implementation of the algorithm. + + Method *SLSQP* uses Sequential Least SQuares Programming to minimize a + function of several variables with any combination of bounds, equality + and inequality constraints. The method wraps the SLSQP Optimization + subroutine originally implemented by Dieter Kraft [12]_. Note that the + wrapper handles infinite values in bounds by converting them into large + floating values. + + **Custom minimizers** + + It may be useful to pass a custom minimization method, for example + when using a frontend to this method such as `scipy.optimize.basinhopping` + or a different library. You can simply pass a callable as the ``method`` + parameter. + + The callable is called as ``method(fun, x0, args, **kwargs, **options)`` + where ``kwargs`` corresponds to any other parameters passed to `minimize` + (such as `callback`, `hess`, etc.), except the `options` dict, which has + its contents also passed as `method` parameters pair by pair. Also, if + `jac` has been passed as a bool type, `jac` and `fun` are mangled so that + `fun` returns just the function values and `jac` is converted to a function + returning the Jacobian. The method shall return an ``OptimizeResult`` + object. + + The provided `method` callable must be able to accept (and possibly ignore) + arbitrary parameters; the set of parameters accepted by `minimize` may + expand in future versions and then these parameters will be passed to + the method. You can find an example in the scipy.optimize tutorial. + + .. versionadded:: 0.11.0 + + References + ---------- + .. [1] Nelder, J A, and R Mead. 1965. A Simplex Method for Function + Minimization. The Computer Journal 7: 308-13. + .. [2] Wright M H. 1996. Direct search methods: Once scorned, now + respectable, in Numerical Analysis 1995: Proceedings of the 1995 + Dundee Biennial Conference in Numerical Analysis (Eds. D F + Griffiths and G A Watson). Addison Wesley Longman, Harlow, UK. + 191-208. + .. [3] Powell, M J D. 1964. An efficient method for finding the minimum of + a function of several variables without calculating derivatives. The + Computer Journal 7: 155-162. + .. [4] Press W, S A Teukolsky, W T Vetterling and B P Flannery. + Numerical Recipes (any edition), Cambridge University Press. + .. [5] Nocedal, J, and S J Wright. 2006. Numerical Optimization. + Springer New York. + .. [6] Byrd, R H and P Lu and J. Nocedal. 1995. A Limited Memory + Algorithm for Bound Constrained Optimization. SIAM Journal on + Scientific and Statistical Computing 16 (5): 1190-1208. + .. [7] Zhu, C and R H Byrd and J Nocedal. 1997. L-BFGS-B: Algorithm + 778: L-BFGS-B, FORTRAN routines for large scale bound constrained + optimization. ACM Transactions on Mathematical Software 23 (4): + 550-560. + .. [8] Nash, S G. Newton-Type Minimization Via the Lanczos Method. + 1984. SIAM Journal of Numerical Analysis 21: 770-778. + .. [9] Powell, M J D. A direct search optimization method that models + the objective and constraint functions by linear interpolation. + 1994. Advances in Optimization and Numerical Analysis, eds. S. Gomez + and J-P Hennart, Kluwer Academic (Dordrecht), 51-67. + .. [10] Powell M J D. Direct search algorithms for optimization + calculations. 1998. Acta Numerica 7: 287-336. + .. [11] Powell M J D. A view of algorithms for optimization without + derivatives. 2007.Cambridge University Technical Report DAMTP + 2007/NA03 + .. [12] Kraft, D. A software package for sequential quadratic + programming. 1988. Tech. Rep. DFVLR-FB 88-28, DLR German Aerospace + Center -- Institute for Flight Mechanics, Koln, Germany. + + Examples + -------- + Let us consider the problem of minimizing the Rosenbrock function. This + function (and its respective derivatives) is implemented in `rosen` + (resp. `rosen_der`, `rosen_hess`) in the `scipy.optimize`. + + >>> from scipy.optimize import minimize, rosen, rosen_der + + A simple application of the *Nelder-Mead* method is: + + >>> x0 = [1.3, 0.7, 0.8, 1.9, 1.2] + >>> res = minimize(rosen, x0, method='Nelder-Mead') + >>> res.x + [ 1. 1. 1. 1. 1.] + + Now using the *BFGS* algorithm, using the first derivative and a few + options: + + >>> res = minimize(rosen, x0, method='BFGS', jac=rosen_der, + ... options={'gtol': 1e-6, 'disp': True}) + Optimization terminated successfully. + Current function value: 0.000000 + Iterations: 52 + Function evaluations: 64 + Gradient evaluations: 64 + >>> res.x + [ 1. 1. 1. 1. 1.] + >>> print res.message + Optimization terminated successfully. + >>> res.hess + [[ 0.00749589 0.01255155 0.02396251 0.04750988 0.09495377] + [ 0.01255155 0.02510441 0.04794055 0.09502834 0.18996269] + [ 0.02396251 0.04794055 0.09631614 0.19092151 0.38165151] + [ 0.04750988 0.09502834 0.19092151 0.38341252 0.7664427 ] + [ 0.09495377 0.18996269 0.38165151 0.7664427 1.53713523]] + + + Next, consider a minimization problem with several constraints (namely + Example 16.4 from [5]_). The objective function is: + + >>> fun = lambda x: (x[0] - 1)**2 + (x[1] - 2.5)**2 + + There are three constraints defined as: + + >>> cons = ({'type': 'ineq', 'fun': lambda x: x[0] - 2 * x[1] + 2}, + ... {'type': 'ineq', 'fun': lambda x: -x[0] - 2 * x[1] + 6}, + ... {'type': 'ineq', 'fun': lambda x: -x[0] + 2 * x[1] + 2}) + + And variables must be positive, hence the following bounds: + + >>> bnds = ((0, None), (0, None)) + + The optimization problem is solved using the SLSQP method as: + + >>> res = minimize(fun, (2, 0), method='SLSQP', bounds=bnds, + ... constraints=cons) + + It should converge to the theoretical solution (1.4 ,1.7). + + +默认没有约束时,使用的是 [BFGS 方法](https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm)。 + +利用 `callback` 参数查看迭代的历史: + + +```python +x0 = [-1.5, 4.5] +xi = [x0] +result = minimize(rosen, x0, callback=xi.append) +xi = np.asarray(xi) +print xi.shape +print result.x +print "in {} function evaluations.".format(result.nfev) +``` + + (37L, 2L) + [ 1. 1.] + in 200 function evaluations. + + +绘图显示轨迹: + + +```python +x, y = meshgrid(np.linspace(-2.3,1.75,25), np.linspace(-0.5,4.5,25)) +z = rosen([x,y]) +fig = figure(figsize=(12,5.5)) +ax = fig.gca(projection="3d"); ax.azim = 70; ax.elev = 75 +ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlim((0,1000)) +p = ax.plot_surface(x,y,z,rstride=1, cstride=1, cmap=cm.jet) +intermed = ax.plot(xi[:,0], xi[:,1], rosen(xi.T), "g-o") +rosen_min = ax.plot([1],[1],[0],"ro") +``` + + +![png](output_30_0.png) + + +`BFGS` 需要计算函数的 Jacobian 矩阵: + +给定 $\left[y_1,y_2,y_3\right] = f(x_0, x_1, x_2)$ + +$$J=\left[ \begin{matrix} \frac{\partial y_1}{\partial x_0} & \frac{\partial y_1}{\partial x_1} & \frac{\partial y_1}{\partial x_2} \\\ \frac{\partial y_2}{\partial x_0} & \frac{\partial y_2}{\partial x_1} & \frac{\partial y_2}{\partial x_2} \\\ \frac{\partial y_3}{\partial x_0} & \frac{\partial y_3}{\partial x_1} & \frac{\partial y_3}{\partial x_2} \end{matrix} \right]$$ + +在我们的例子中 + +$$J= \left[ \begin{matrix}\frac{\partial rosen}{\partial x_0} & \frac{\partial rosen}{\partial x_1} \end{matrix} \right] $$ + +导入 `rosen` 函数的 `Jacobian` 函数 `rosen_der`: + + +```python +from scipy.optimize import rosen_der +``` + +此时,我们将 `Jacobian` 矩阵作为参数传入: + + +```python +xi = [x0] +result = minimize(rosen, x0, jac=rosen_der, callback=xi.append) +xi = np.asarray(xi) +print xi.shape +print "in {} function evaluations and {} jacobian evaluations.".format(result.nfev, result.njev) +``` + + (38L, 2L) + in 49 function evaluations and 49 jacobian evaluations. + + +可以看到,函数计算的开销大约减少了一半,迭代路径与上面的基本吻合: + + +```python +x, y = meshgrid(np.linspace(-2.3,1.75,25), np.linspace(-0.5,4.5,25)) +z = rosen([x,y]) +fig = figure(figsize=(12,5.5)) +ax = fig.gca(projection="3d"); ax.azim = 70; ax.elev = 75 +ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlim((0,1000)) +p = ax.plot_surface(x,y,z,rstride=1, cstride=1, cmap=cm.jet) +intermed = ax.plot(xi[:,0], xi[:,1], rosen(xi.T), "g-o") +rosen_min = ax.plot([1],[1],[0],"ro") +``` + + +![png](output_36_0.png) + + +## Nelder-Mead Simplex 算法 + +改变 `minimize` 使用的算法,使用 [Nelder–Mead 单纯形算法](https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method): + + +```python +xi = [x0] +result = minimize(rosen, x0, method="nelder-mead", callback = xi.append) +xi = np.asarray(xi) +print xi.shape +print "Solved the Nelder-Mead Simplex method with {} function evaluations.".format(result.nfev) +``` + + (120L, 2L) + Solved the Nelder-Mead Simplex method with 226 function evaluations. + + + +```python +x, y = meshgrid(np.linspace(-1.9,1.75,25), np.linspace(-0.5,4.5,25)) +z = rosen([x,y]) +fig = figure(figsize=(12,5.5)) +ax = fig.gca(projection="3d"); ax.azim = 70; ax.elev = 75 +ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlim((0,1000)) +p = ax.plot_surface(x,y,z,rstride=1, cstride=1, cmap=cm.jet) +intermed = ax.plot(xi[:,0], xi[:,1], rosen(xi.T), "g-o") +rosen_min = ax.plot([1],[1],[0],"ro") +``` + + +![png](output_40_0.png) + + +### Powell 算法 + +使用 [Powell 算法](https://en.wikipedia.org/wiki/Powell%27s_method) + + +```python +xi = [x0] +result = minimize(rosen, x0, method="powell", callback=xi.append) +xi = np.asarray(xi) +print xi.shape +print "Solved Powell's method with {} function evaluations.".format(result.nfev) +``` + + (31L, 2L) + Solved Powell's method with 855 function evaluations. + + + +```python +x, y = meshgrid(np.linspace(-2.3,1.75,25), np.linspace(-0.5,4.5,25)) +z = rosen([x,y]) +fig = figure(figsize=(12,5.5)) +ax = fig.gca(projection="3d"); ax.azim = 70; ax.elev = 75 +ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlim((0,1000)) +p = ax.plot_surface(x,y,z,rstride=1, cstride=1, cmap=cm.jet) +intermed = ax.plot(xi[:,0], xi[:,1], rosen(xi.T), "g-o") +rosen_min = ax.plot([1],[1],[0],"ro") +``` + + +![png](output_44_0.png) + diff --git a/docs/04-scipy/04.06-integration-in-python.md b/docs/04-scipy/04.06-integration-in-python.md new file mode 100644 index 00000000..bc9e95de --- /dev/null +++ b/docs/04-scipy/04.06-integration-in-python.md @@ -0,0 +1,649 @@ + +# 积分 + +## 符号积分 + +积分与求导的关系: + +$$\frac{d}{dx} F(x) = f(x) +\Rightarrow F(x) = \int f(x) dx$$ + +符号运算可以用 `sympy` 模块完成。 + +先导入 `init_printing` 模块方便其显示: + + +```python +from sympy import init_printing +init_printing() +``` + + +```python +from sympy import symbols, integrate +import sympy +``` + +产生 x 和 y 两个符号变量,并进行运算: + + +```python +x, y = symbols('x y') +sympy.sqrt(x ** 2 + y ** 2) +``` + + + + +$$\sqrt{x^{2} + y^{2}}$$ + + + +对于生成的符号变量 `z`,我们将其中的 `x` 利用 `subs` 方法替换为 `3`: + + +```python +z = sympy.sqrt(x ** 2 + y ** 2) +z.subs(x, 3) +``` + + + + +$$\sqrt{y^{2} + 9}$$ + + + +再替换 `y`: + + +```python +z.subs(x, 3).subs(y, 4) +``` + + + + +$$5$$ + + + +还可以从 `sympy.abc` 中导入现成的符号变量: + + +```python +from sympy.abc import theta +y = sympy.sin(theta) ** 2 +y +``` + + + + +$$\sin^{2}{\left (\theta \right )}$$ + + + +对 y 进行积分: + + +```python +Y = integrate(y) +Y +``` + + + + +$$\frac{\theta}{2} - \frac{1}{2} \sin{\left (\theta \right )} \cos{\left (\theta \right )}$$ + + + +计算 $Y(\pi) - Y(0)$: + + +```python +import numpy as np +np.set_printoptions(precision=3) + +Y.subs(theta, np.pi) - Y.subs(theta, 0) +``` + + + + +$$1.5707963267949$$ + + + +计算 $\int_0^\pi y d\theta$ : + + +```python +integrate(y, (theta, 0, sympy.pi)) +``` + + + + +$$\frac{\pi}{2}$$ + + + +显示的是字符表达式,查看具体数值可以使用 `evalf()` 方法,或者传入 `numpy.pi`,而不是 `sympy.pi` : + + +```python +integrate(y, (theta, 0, sympy.pi)).evalf() +``` + + + + +$$1.5707963267949$$ + + + + +```python +integrate(y, (theta, 0, np.pi)) +``` + + + + +$$1.5707963267949$$ + + + +根据牛顿莱布尼兹公式,这两个数值应该相等。 + +产生不定积分对象: + + +```python +Y_indef = sympy.Integral(y) +Y_indef +``` + + + + +$$\int \sin^{2}{\left (\theta \right )}\, d\theta$$ + + + + +```python +print type(Y_indef) +``` + + + + +定积分: + + +```python +Y_def = sympy.Integral(y, (theta, 0, sympy.pi)) +Y_def +``` + + + + +$$\int_{0}^{\pi} \sin^{2}{\left (\theta \right )}\, d\theta$$ + + + +产生函数 $Y(x) = \int_0^x sin^2(\theta) d\theta$,并将其向量化: + + +```python +Y_raw = lambda x: integrate(y, (theta, 0, x)) +Y = np.vectorize(Y_raw) +``` + + +```python +%matplotlib inline +import matplotlib.pyplot as plt + +x = np.linspace(0, 2 * np.pi) +p = plt.plot(x, Y(x)) +t = plt.title(r'$Y(x) = \int_0^x sin^2(\theta) d\theta$') +``` + + +![png](output_29_0.png) + + +## 数值积分 + +数值积分: + +$$F(x) = \lim_{n \rightarrow \infty} \sum_{i=0}^{n-1} f(x_i)(x_{i+1}-x_i) +\Rightarrow F(x) = \int_{x_0}^{x_n} f(x) dx$$ + +导入贝塞尔函数: + + +```python +from scipy.special import jv +``` + + +```python +def f(x): + return jv(2.5, x) +``` + + +```python +x = np.linspace(0, 10) +p = plt.plot(x, f(x), 'k-') +``` + + +![png](output_34_0.png) + + +### `quad` 函数 + +Quadrature 积分的原理参见: + +http://en.wikipedia.org/wiki/Numerical_integration#Quadrature_rules_based_on_interpolating_functions + +quad 返回一个 (积分值,误差) 组成的元组: + + +```python +from scipy.integrate import quad +interval = [0, 6.5] +value, max_err = quad(f, *interval) +``` + +积分值: + + +```python +print value +``` + + 1.28474297234 + + +最大误差: + + +```python +print max_err +``` + + 2.34181853668e-09 + + +积分区间图示,蓝色为正,红色为负: + + +```python +print "integral = {:.9f}".format(value) +print "upper bound on error: {:.2e}".format(max_err) +x = np.linspace(0, 10, 100) +p = plt.plot(x, f(x), 'k-') +x = np.linspace(0, 6.5, 45) +p = plt.fill_between(x, f(x), where=f(x)>0, color="blue") +p = plt.fill_between(x, f(x), where=f(x)<0, color="red", interpolate=True) +``` + + integral = 1.284742972 + upper bound on error: 2.34e-09 + + + +![png](output_43_1.png) + + +### 积分到无穷 + + +```python +from numpy import inf +interval = [0., inf] + +def g(x): + return np.exp(-x ** 1/2) +``` + + +```python +value, max_err = quad(g, *interval) +x = np.linspace(0, 10, 50) +fig = plt.figure(figsize=(10,3)) +p = plt.plot(x, g(x), 'k-') +p = plt.fill_between(x, g(x)) +plt.annotate(r"$\int_0^{\infty}e^{-x^1/2}dx = $" + "{}".format(value), (4, 0.6), + fontsize=16) +print "upper bound on error: {:.1e}".format(max_err) +``` + + upper bound on error: 7.2e-11 + + + +![png](output_46_1.png) + + +### 双重积分 + +假设我们要进行如下的积分: + +$$ I_n = \int \limits_0^{\infty} \int \limits_1^{\infty} \frac{e^{-xt}}{t^n}dt dx = \frac{1}{n}$$ + + +```python +def h(x, t, n): + """core function, takes x, t, n""" + return np.exp(-x * t) / (t ** n) +``` + +一种方式是调用两次 `quad` 函数,不过这里 `quad` 的返回值不能向量化,所以使用了修饰符 `vectorize` 将其向量化: + + +```python +from numpy import vectorize +@vectorize +def int_h_dx(t, n): + """Time integrand of h(x).""" + return quad(h, 0, np.inf, args=(t, n))[0] +``` + + +```python +@vectorize +def I_n(n): + return quad(int_h_dx, 1, np.inf, args=(n)) +``` + + +```python +I_n([0.5, 1.0, 2.0, 5]) +``` + + + + + (array([ 1.97, 1. , 0.5 , 0.2 ]), + array([ 9.804e-13, 1.110e-14, 5.551e-15, 2.220e-15])) + + + +或者直接调用 `dblquad` 函数,并将积分参数传入,传入方式有多种,后传入的先进行积分: + + +```python +from scipy.integrate import dblquad +@vectorize +def I(n): + """Same as I_n, but using the built-in dblquad""" + x_lower = 0 + x_upper = np.inf + return dblquad(h, + lambda t_lower: 1, lambda t_upper: np.inf, + x_lower, x_upper, args=(n,)) +``` + + +```python +I_n([0.5, 1.0, 2.0, 5]) +``` + + + + + (array([ 1.97, 1. , 0.5 , 0.2 ]), + array([ 9.804e-13, 1.110e-14, 5.551e-15, 2.220e-15])) + + + +## 采样点积分 + +### trapz 方法 和 simps 方法 + + +```python +from scipy.integrate import trapz, simps +``` + +`sin` 函数, `100` 个采样点和 `5` 个采样点: + + +```python +x_s = np.linspace(0, np.pi, 5) +y_s = np.sin(x_s) +x = np.linspace(0, np.pi, 100) +y = np.sin(x) +``` + + +```python +p = plt.plot(x, y, 'k:') +p = plt.plot(x_s, y_s, 'k+-') +p = plt.fill_between(x_s, y_s, color="gray") +``` + + +![png](output_62_0.png) + + +采用 [trapezoidal 方法](https://en.wikipedia.org/wiki/Trapezoidal_rule) 和 [simpson 方法](https://en.wikipedia.org/wiki/Simpson%27s_rule) 对这些采样点进行积分(函数积分为 2): + + +```python +result_s = trapz(y_s, x_s) +result_s_s = simps(y_s, x_s) +result = trapz(y, x) +print "Trapezoidal Integration over 5 points : {:.3f}".format(result_s) +print "Simpson Integration over 5 points : {:.3f}".format(result_s_s) +print "Trapezoidal Integration over 100 points : {:.3f}".format(result) +``` + + Trapezoidal Integration over 5 points : 1.896 + Simpson Integration over 5 points : 2.005 + Trapezoidal Integration over 100 points : 2.000 + + +### 使用 ufunc 进行积分 + +`Numpy` 中有很多 `ufunc` 对象: + + +```python +type(np.add) +``` + + + + + numpy.ufunc + + + + +```python +np.info(np.add.accumulate) +``` + + accumulate(array, axis=0, dtype=None, out=None) + + Accumulate the result of applying the operator to all elements. + + For a one-dimensional array, accumulate produces results equivalent to:: + + r = np.empty(len(A)) + t = op.identity # op = the ufunc being applied to A's elements + for i in range(len(A)): + t = op(t, A[i]) + r[i] = t + return r + + For example, add.accumulate() is equivalent to np.cumsum(). + + For a multi-dimensional array, accumulate is applied along only one + axis (axis zero by default; see Examples below) so repeated use is + necessary if one wants to accumulate over multiple axes. + + Parameters + ---------- + array : array_like + The array to act on. + axis : int, optional + The axis along which to apply the accumulation; default is zero. + dtype : data-type code, optional + The data-type used to represent the intermediate results. Defaults + to the data-type of the output array if such is provided, or the + the data-type of the input array if no output array is provided. + out : ndarray, optional + A location into which the result is stored. If not provided a + freshly-allocated array is returned. + + Returns + ------- + r : ndarray + The accumulated values. If `out` was supplied, `r` is a reference to + `out`. + + Examples + -------- + 1-D array examples: + + >>> np.add.accumulate([2, 3, 5]) + array([ 2, 5, 10]) + >>> np.multiply.accumulate([2, 3, 5]) + array([ 2, 6, 30]) + + 2-D array examples: + + >>> I = np.eye(2) + >>> I + array([[ 1., 0.], + [ 0., 1.]]) + + Accumulate along axis 0 (rows), down columns: + + >>> np.add.accumulate(I, 0) + array([[ 1., 0.], + [ 1., 1.]]) + >>> np.add.accumulate(I) # no axis specified = axis zero + array([[ 1., 0.], + [ 1., 1.]]) + + Accumulate along axis 1 (columns), through rows: + + >>> np.add.accumulate(I, 1) + array([[ 1., 1.], + [ 0., 1.]]) + + +`np.add.accumulate` 相当于 `cumsum` : + + +```python +result_np = np.add.accumulate(y) * (x[1] - x[0]) - (x[1] - x[0]) / 2 +``` + + +```python +p = plt.plot(x, - np.cos(x) + np.cos(0), 'rx') +p = plt.plot(x, result_np) +``` + + +![png](output_71_0.png) + + +### 速度比较 + +计算积分:$$\int_0^x sin \theta d\theta$$ + + +```python +import sympy +from sympy.abc import x, theta +sympy_x = x +``` + + +```python +x = np.linspace(0, 20 * np.pi, 1e+4) +y = np.sin(x) +sympy_y = vectorize(lambda x: sympy.integrate(sympy.sin(theta), (theta, 0, x))) +``` + +`numpy` 方法: + + +```python +%timeit np.add.accumulate(y) * (x[1] - x[0]) +y0 = np.add.accumulate(y) * (x[1] - x[0]) +print y0[-1] +``` + + The slowest run took 4.32 times longer than the fastest. This could mean that an intermediate result is being cached + 10000 loops, best of 3: 56.2 µs per loop + -2.34138044756e-17 + + +`quad` 方法: + + +```python +%timeit quad(np.sin, 0, 20 * np.pi) +y2 = quad(np.sin, 0, 20 * np.pi, full_output=True) +print "result = ", y2[0] +print "number of evaluations", y2[-1]['neval'] +``` + + 10000 loops, best of 3: 40.5 µs per loop + result = 3.43781337153e-15 + number of evaluations 21 + + +`trapz` 方法: + + +```python +%timeit trapz(y, x) +y1 = trapz(y, x) +print y1 +``` + + 10000 loops, best of 3: 105 µs per loop + -4.4408920985e-16 + + +`simps` 方法: + + +```python +%timeit simps(y, x) +y3 = simps(y, x) +print y3 +``` + + 1000 loops, best of 3: 801 µs per loop + 3.28428554968e-16 + + +`sympy` 积分方法: + + +```python +%timeit sympy_y(20 * np.pi) +y4 = sympy_y(20 * np.pi) +print y4 +``` + + 100 loops, best of 3: 6.86 ms per loop + 0 + diff --git a/docs/04-scipy/04.07-ODEs.md b/docs/04-scipy/04.07-ODEs.md new file mode 100644 index 00000000..c411d933 --- /dev/null +++ b/docs/04-scipy/04.07-ODEs.md @@ -0,0 +1,126 @@ + +# 解微分方程 + + +```python +%pylab inline +``` + + Populating the interactive namespace from numpy and matplotlib + + +## 积分求解 + +### 简单的例子 + +$$\frac{dy}{dt} = sin(t)$$ + + +```python +def dy_dt(y, t): + return np.sin(t) +``` + +积分求解: + + +```python +from scipy.integrate import odeint + +t = np.linspace(0, 2*pi, 100) + +result = odeint(dy_dt, 0, t) +``` + + +```python +fig = figure(figsize=(12,4)) +p = plot(t, result, "rx", label=r"$\int_{0}^{x}sin(t) dt $") +p = plot(t, -cos(t) + cos(0), label=r"$cos(0) - cos(t)$") +p = plot(t, dy_dt(0, t), "g-", label=r"$\frac{dy}{dt}(t)$") +l = legend(loc="upper right") +xl = xlabel("t") +``` + + +![png](output_8_0.png) + + +### 高阶微分方程 + +抛物运动(竖直方向): + +$$ +\frac{d^2x}{dt^2} = g - \frac{D}{m}\frac{dx}{dt} +$$ + +改写成如下形式: + +$$y = \left[x, \frac{dx}{dt}\right] $$ + +$$\begin{aligned} +\frac{dy_0}{dt} &= y_1 \\\ +\frac{dy_1}{dt} &= -g - \frac{D}{m} y_1 \\\ +\end{aligned} +$$ + + +```python +def dy_dt(y, t): + """Governing equations for projectile motion with drag. + y[0] = position + y[1] = velocity + g = gravity (m/s2) + D = drag (1/s) = force/velocity + m = mass (kg) + """ + g = -9.8 + D = 0.1 + m = 0.15 + dy1 = g - (D/m) * y[1] + dy0 = y[1] if y[0] >= 0 else 0. + return [dy0, dy1] +``` + + +```python +position_0 = 0. +velocity_0 = 100 +t = linspace(0, 12, 100) +y = odeint(dy_dt, [position_0, velocity_0], t) +``` + + +```python +p = plot(t, y[:,0]) +yl = ylabel("Height (m)") +xl = xlabel("Time (s)") +``` + + +![png](output_13_0.png) + + + +```python +y, infodict = odeint(dy_dt, [position_0, velocity_0], t, full_output=True, printmessg=True, ) +print sorted(infodict.keys()) +print "cumulative number of function evaluations at each calculated point:", infodict['nfe'] +print "cumulative number of time steps", infodict['nst'] +``` + + Integration successful. + ['hu', 'imxer', 'leniw', 'lenrw', 'message', 'mused', 'nfe', 'nje', 'nqu', 'nst', 'tcur', 'tolsf', 'tsw'] + cumulative number of function evaluations at each calculated point: [ 45 49 51 53 55 59 61 61 63 65 67 67 69 71 73 73 75 77 + 77 79 79 81 81 83 85 85 87 87 89 89 91 91 93 95 95 97 + 97 99 99 101 101 103 103 105 107 107 109 109 111 111 113 113 115 115 + 117 117 119 119 121 121 123 123 123 125 125 127 127 129 129 131 131 131 + 133 133 135 135 135 137 137 139 139 139 141 141 143 143 143 145 145 147 + 147 149 149 149 154 158 274 280 280] + cumulative number of time steps [ 20 22 23 24 25 27 28 28 29 30 31 31 32 33 34 34 35 36 + 36 37 37 38 38 39 40 40 41 41 42 42 43 43 44 45 45 46 + 46 47 47 48 48 49 49 50 51 51 52 52 53 53 54 54 55 55 + 56 56 57 57 58 58 59 59 59 60 60 61 61 62 62 63 63 63 + 64 64 65 65 65 66 66 67 67 67 68 68 69 69 69 70 70 71 + 71 72 72 72 73 75 130 133 133] + diff --git a/docs/04-scipy/04.08-sparse-matrix.md b/docs/04-scipy/04.08-sparse-matrix.md new file mode 100644 index 00000000..6fee0c3b --- /dev/null +++ b/docs/04-scipy/04.08-sparse-matrix.md @@ -0,0 +1,295 @@ + +# 稀疏矩阵 + +`Scipy` 提供了稀疏矩阵的支持(`scipy.sparse`)。 + +稀疏矩阵主要使用 位置 + 值 的方法来存储矩阵的非零元素,根据存储和使用方式的不同,有如下几种类型的稀疏矩阵: + +类型|描述 +---|---- +`bsr_matrix(arg1[, shape, dtype, copy, blocksize])` | Block Sparse Row matrix +`coo_matrix(arg1[, shape, dtype, copy])` | A sparse matrix in COOrdinate format. +`csc_matrix(arg1[, shape, dtype, copy])` | Compressed Sparse Column matrix +`csr_matrix(arg1[, shape, dtype, copy])` | Compressed Sparse Row matrix +`dia_matrix(arg1[, shape, dtype, copy])` | Sparse matrix with DIAgonal storage +`dok_matrix(arg1[, shape, dtype, copy])` | Dictionary Of Keys based sparse matrix. +`lil_matrix(arg1[, shape, dtype, copy])` | Row-based linked list sparse matrix + +在这些存储格式中: + +- COO 格式在构建矩阵时比较高效 +- CSC 和 CSR 格式在乘法计算时比较高效 + +## 构建稀疏矩阵 + + +```python +from scipy.sparse import * +import numpy as np +``` + +创建一个空的稀疏矩阵: + + +```python +coo_matrix((2,3)) +``` + + + + + <2x3 sparse matrix of type '' + with 0 stored elements in COOrdinate format> + + + +也可以使用一个已有的矩阵或数组或列表中创建新矩阵: + + +```python +A = coo_matrix([[1,2,0],[0,0,3],[4,0,5]]) +print A +``` + + (0, 0) 1 + (0, 1) 2 + (1, 2) 3 + (2, 0) 4 + (2, 2) 5 + + +不同格式的稀疏矩阵可以相互转化: + + +```python +type(A) +``` + + + + + scipy.sparse.coo.coo_matrix + + + + +```python +B = A.tocsr() +type(B) +``` + + + + + scipy.sparse.csr.csr_matrix + + + +可以转化为普通矩阵: + + +```python +C = A.todense() +C +``` + + + + + matrix([[1, 2, 0], + [0, 0, 3], + [4, 0, 5]]) + + + +与向量的乘法: + + +```python +v = np.array([1,0,-1]) +A.dot(v) +``` + + + + + array([ 1, -3, -1]) + + + +还可以传入一个 `(data, (row, col))` 的元组来构建稀疏矩阵: + + +```python +I = np.array([0,3,1,0]) +J = np.array([0,3,1,2]) +V = np.array([4,5,7,9]) +A = coo_matrix((V,(I,J)),shape=(4,4)) +``` + + +```python +print A +``` + + (0, 0) 4 + (3, 3) 5 + (1, 1) 7 + (0, 2) 9 + + +COO 格式的稀疏矩阵在构建的时候只是简单的将坐标和值加到后面,对于重复的坐标不进行处理: + + +```python +I = np.array([0,0,1,3,1,0,0]) +J = np.array([0,2,1,3,1,0,0]) +V = np.array([1,1,1,1,1,1,1]) +B = coo_matrix((V,(I,J)),shape=(4,4)) +print B +``` + + (0, 0) 1 + (0, 2) 1 + (1, 1) 1 + (3, 3) 1 + (1, 1) 1 + (0, 0) 1 + (0, 0) 1 + + +转换成 CSR 格式会自动将相同坐标的值合并: + + +```python +C = B.tocsr() +print C +``` + + (0, 0) 3 + (0, 2) 1 + (1, 1) 2 + (3, 3) 1 + + +## 求解微分方程 + + +```python +from scipy.sparse import lil_matrix +from scipy.sparse.linalg import spsolve +from numpy.linalg import solve, norm +from numpy.random import rand +``` + +构建 `1000 x 1000` 的稀疏矩阵: + + +```python +A = lil_matrix((1000, 1000)) +A[0, :100] = rand(100) +A[1, 100:200] = A[0, :100] +A.setdiag(rand(1000)) +``` + +转化为 CSR 之后,用 `spsolve` 求解 $Ax=b$: + + +```python +A = A.tocsr() +b = rand(1000) +x = spsolve(A, b) +``` + +转化成正常数组之后求解: + + +```python +x_ = solve(A.toarray(), b) +``` + +查看误差: + + +```python +err = norm(x-x_) +err +``` + + + + + 6.4310987107687431e-13 + + + +## sparse.find 函数 + +返回一个三元组,表示稀疏矩阵中非零元素的 `(row, col, value)`: + + +```python +from scipy import sparse + +row, col, val = sparse.find(C) +print row, col, val +``` + + [0 0 1 3] [0 2 1 3] [3 1 2 1] + + +## sparse.issparse 函数 + +查看一个对象是否为稀疏矩阵: + + +```python +sparse.issparse(B) +``` + + + + + True + + + +或者 + + +```python +sparse.isspmatrix(B.todense()) +``` + + + + + False + + + +还可以查询是否为指定格式的稀疏矩阵: + + +```python +sparse.isspmatrix_coo(B) +``` + + + + + True + + + + +```python +sparse.isspmatrix_csr(B) +``` + + + + + False + + diff --git a/docs/04-scipy/04.09-linear-algbra.md b/docs/04-scipy/04.09-linear-algbra.md new file mode 100644 index 00000000..b9b98945 --- /dev/null +++ b/docs/04-scipy/04.09-linear-algbra.md @@ -0,0 +1,777 @@ + +# 线性代数 + +`numpy` 和 `scipy` 中,负责进行线性代数部分计算的模块叫做 `linalg`。 + + +```python +import numpy as np +import numpy.linalg +import scipy as sp +import scipy.linalg +import matplotlib.pyplot as plt +from scipy import linalg + +%matplotlib inline +``` + +## numpy.linalg VS scipy.linalg + +一方面`scipy.linalg` 包含 `numpy.linalg` 中的所有函数,同时还包含了很多 `numpy.linalg` 中没有的函数。 + +另一方面,`scipy.linalg` 能够保证这些函数使用 BLAS/LAPACK 加速,而 `numpy.linalg` 中这些加速是可选的。 + +因此,在使用时,我们一般使用 `scipy.linalg` 而不是 `numpy.linalg`。 + +我们可以简单看看两个模块的差异: + + +```python +print "number of items in numpy.linalg:", len(dir(numpy.linalg)) +print "number of items in scipy.linalg:", len(dir(scipy.linalg)) +``` + + number of items in numpy.linalg: 36 + number of items in scipy.linalg: 115 + + +## numpy.matrix VS 2D numpy.ndarray + +线性代数的基本操作对象是矩阵,而矩阵的表示方法主要有两种:`numpy.matrix` 和 2D `numpy.ndarray`。 + +### numpy.matrix + +`numpy.matrix` 是一个矩阵类,提供了一些方便的矩阵操作: +- 支持类似 `MATLAB` 创建矩阵的语法 +- 矩阵乘法默认用 `*` 号 +- `.I` 表示逆,`.T` 表示转置 + +可以用 `mat` 或者 `matrix` 来产生矩阵: + + +```python +A = np.mat("[1, 2; 3, 4]") +print repr(A) + +A = np.matrix("[1, 2; 3, 4]") +print repr(A) +``` + + matrix([[1, 2], + [3, 4]]) + matrix([[1, 2], + [3, 4]]) + + +转置和逆: + + +```python +print repr(A.I) +print repr(A.T) +``` + + matrix([[-2. , 1. ], + [ 1.5, -0.5]]) + matrix([[1, 3], + [2, 4]]) + + +矩阵乘法: + + +```python +b = np.mat('[5; 6]') +print repr(A * b) +``` + + matrix([[17], + [39]]) + + +### 2 维 numpy.ndarray + +虽然 `numpy.matrix` 有着上面的好处,但是一般不建议使用,而是用 2 维 `numpy.ndarray` 对象替代,这样可以避免一些不必要的困惑。 + +我们可以使用 `array` 复现上面的操作: + + +```python +A = np.array([[1,2], [3,4]]) +print repr(A) +``` + + array([[1, 2], + [3, 4]]) + + +逆和转置: + + +```python +print repr(linalg.inv(A)) +print repr(A.T) +``` + + array([[-2. , 1. ], + [ 1.5, -0.5]]) + array([[1, 3], + [2, 4]]) + + +矩阵乘法: + + +```python +b = np.array([5, 6]) + +print repr(A.dot(b)) +``` + + array([17, 39]) + + +普通乘法: + + +```python +print repr(A * b) +``` + + array([[ 5, 12], + [15, 24]]) + + +`scipy.linalg` 的操作可以作用到两种类型的对象上,没有区别。 + +## 基本操作 + +### 求逆 + +矩阵 $\mathbf{A}$ 的逆 $\mathbf{B}$ 满足:$\mathbf{BA}=\mathbf{AB}=I$,记作 $\mathbf{B} = \mathbf{A}^{-1}$。 + +事实上,我们已经见过求逆的操作,`linalg.inv` 可以求一个可逆矩阵的逆: + + +```python +A = np.array([[1,2],[3,4]]) + +print linalg.inv(A) + +print A.dot(scipy.linalg.inv(A)) +``` + + [[-2. 1. ] + [ 1.5 -0.5]] + [[ 1.00000000e+00 0.00000000e+00] + [ 8.88178420e-16 1.00000000e+00]] + + +### 求解线性方程组 + +例如,下列方程组 +$$ +\begin{eqnarray*} +x + 3y + 5z & = & 10 \\ +2x + 5y + z & = & 8 \\ +2x + 3y + 8z & = & 3 +\end{eqnarray*} +$$ +的解为: +$$ +\begin{split}\left[\begin{array}{c} x\\ y\\ z\end{array}\right]=\left[\begin{array}{ccc} 1 & 3 & 5\\ 2 & 5 & 1\\ 2 & 3 & 8\end{array}\right]^{-1}\left[\begin{array}{c} 10\\ 8\\ 3\end{array}\right]=\frac{1}{25}\left[\begin{array}{c} -232\\ 129\\ 19\end{array}\right]=\left[\begin{array}{c} -9.28\\ 5.16\\ 0.76\end{array}\right].\end{split} +$$ + +我们可以使用 `linalg.solve` 求解方程组,也可以先求逆再相乘,两者中 `solve` 比较快。 + + +```python +import time + +A = np.array([[1, 3, 5], + [2, 5, 1], + [2, 3, 8]]) +b = np.array([10, 8, 3]) + +tic = time.time() + +for i in xrange(1000): + x = linalg.inv(A).dot(b) + +print x +print A.dot(x)-b +print "inv and dot: {} s".format(time.time() - tic) + +tic = time.time() + +for i in xrange(1000): + x = linalg.solve(A, b) + +print x +print A.dot(x)-b +print "solve: {} s".format(time.time() - tic) +``` + + [-9.28 5.16 0.76] + [ 0.00000000e+00 -1.77635684e-15 -8.88178420e-16] + inv and dot: 0.0353579521179 s + [-9.28 5.16 0.76] + [ 0.00000000e+00 -1.77635684e-15 -1.77635684e-15] + solve: 0.0284671783447 s + + +### 计算行列式 + +方阵的行列式为 +$$ +\left|\mathbf{A}\right|=\sum_{j}\left(-1\right)^{i+j}a_{ij}M_{ij}. +$$ + +其中 $a_{ij}$ 表示 $\mathbf{A}$ 的第 $i$ 行 第 $j$ 列的元素,$M_{ij}$ 表示矩阵 $\mathbf{A}$ 去掉第 $i$ 行 第 $j$ 列的新矩阵的行列式。 + +例如,矩阵 +$$ +\begin{split}\mathbf{A=}\left[\begin{array}{ccc} 1 & 3 & 5\\ 2 & 5 & 1\\ 2 & 3 & 8\end{array}\right]\end{split} +$$ +的行列式是: +$$ +\begin{eqnarray*} \left|\mathbf{A}\right| & = & 1\left|\begin{array}{cc} 5 & 1\\ 3 & 8\end{array}\right|-3\left|\begin{array}{cc} 2 & 1\\ 2 & 8\end{array}\right|+5\left|\begin{array}{cc} 2 & 5\\ 2 & 3\end{array}\right|\\ & = & 1\left(5\cdot8-3\cdot1\right)-3\left(2\cdot8-2\cdot1\right)+5\left(2\cdot3-2\cdot5\right)=-25.\end{eqnarray*} +$$ + +可以用 `linalg.det` 计算行列式: + + +```python +A = np.array([[1, 3, 5], + [2, 5, 1], + [2, 3, 8]]) + +print linalg.det(A) +``` + + -25.0 + + +### 计算矩阵或向量的模 + +矩阵的模定义如下: +$$ +\begin{split}\left\Vert \mathbf{A}\right\Vert =\left\{ \begin{array}{cc} \max_{i}\sum_{j}\left|a_{ij}\right| & \textrm{ord}=\textrm{inf}\\ \min_{i}\sum_{j}\left|a_{ij}\right| & \textrm{ord}=-\textrm{inf}\\ \max_{j}\sum_{i}\left|a_{ij}\right| & \textrm{ord}=1\\ \min_{j}\sum_{i}\left|a_{ij}\right| & \textrm{ord}=-1\\ \max\sigma_{i} & \textrm{ord}=2\\ \min\sigma_{i} & \textrm{ord}=-2\\ \sqrt{\textrm{trace}\left(\mathbf{A}^{H}\mathbf{A}\right)} & \textrm{ord}=\textrm{'fro'}\end{array}\right.\end{split} +$$ +其中,$\sigma_i$ 是矩阵的奇异值。 + +向量的模定义如下: +$$ +\begin{split}\left\Vert \mathbf{x}\right\Vert =\left\{ \begin{array}{cc} \max\left|x_{i}\right| & \textrm{ord}=\textrm{inf}\\ \min\left|x_{i}\right| & \textrm{ord}=-\textrm{inf}\\ \left(\sum_{i}\left|x_{i}\right|^{\textrm{ord}}\right)^{1/\textrm{ord}} & \left|\textrm{ord}\right|<\infty.\end{array}\right.\end{split} +$$ + +`linalg.norm` 可以计算向量或者矩阵的模: + + +```python +A = np.array([[1, 2], + [3, 4]]) + +print linalg.norm(A) + +print linalg.norm(A,'fro') # frobenius norm 默认值 + +print linalg.norm(A,1) # L1 norm 最大列和 + +print linalg.norm(A,-1) # L -1 norm 最小列和 + +print linalg.norm(A,np.inf) # L inf norm 最大行和 +``` + + 5.47722557505 + 5.47722557505 + 6 + 4 + 7 + + +### 最小二乘解和伪逆 + +#### 问题描述 + +所谓最小二乘问题的定义如下: + +假设 $y_i$ 与 $\mathbf{x_i}$ 的关系可以用一组系数 $c_j$ 和对应的模型函数 $f_j(\mathbf{x_i})$ 的模型表示: + +$$ +y_{i}=\sum_{j}c_{j}f_{j}\left(\mathbf{x}_{i}\right)+\epsilon_{i} +$$ + +其中 $\epsilon_i$ 表示数据的不确定性。最小二乘就是要优化这样一个关于 $c_j$ 的问题: +$$ +J\left(\mathbf{c}\right)=\sum_{i}\left|y_{i}-\sum_{j}c_{j}f_{j}\left(x_{i}\right)\right|^{2} +$$ + +其理论解满足: +$$ +\frac{\partial J}{\partial c_{n}^{*}}=0=\sum_{i}\left(y_{i}-\sum_{j}c_{j}f_{j}\left(x_{i}\right)\right)\left(-f_{n}^{*}\left(x_{i}\right)\right) +$$ + +改写为: +$$ +\begin{eqnarray*} \sum_{j}c_{j}\sum_{i}f_{j}\left(x_{i}\right)f_{n}^{*}\left(x_{i}\right) & = & \sum_{i}y_{i}f_{n}^{*}\left(x_{i}\right)\\ \mathbf{A}^{H}\mathbf{Ac} & = & \mathbf{A}^{H}\mathbf{y}\end{eqnarray*} +$$ + +其中: +$$ +\left\{ \mathbf{A}\right\} _{ij}=f_{j}\left(x_{i}\right). +$$ + +当 $\mathbf{A^HA}$ 可逆时,我们有: +$$ +\mathbf{c}=\left(\mathbf{A}^{H}\mathbf{A}\right)^{-1}\mathbf{A}^{H}\mathbf{y}=\mathbf{A}^{\dagger}\mathbf{y} +$$ + +矩阵 $\mathbf{A}^{\dagger}$ 叫做 $\mathbf{A}$ 的伪逆。 + +#### 问题求解 + +注意到,我们的模型可以写为: +$$ +\mathbf{y}=\mathbf{Ac}+\boldsymbol{\epsilon}. +$$ + +在给定 $\mathbf{y}$ 和 $\mathbf{A}$ 的情况下,我们可以使用 `linalg.lstsq` 求解 $\mathbf c$。 + +在给定 $\mathbf{A}$ 的情况下,我们可以使用 `linalg.pinv` 或者 `linalg.pinv2` 求解 $\mathbf{A}^{\dagger}$。 + +#### 例子 + +假设我们的数据满足: +$$ +\begin{align} +y_{i} & =c_{1}e^{-x_{i}}+c_{2}x_{i} \\ +z_{i} & = y_i + \epsilon_i +\end{align} +$$ + +其中 $x_i = \frac{i}{10},\ i = 1,\dots,10$,$c_1 = 5, c_2 = 2$,产生数据 + + +```python +c1, c2 = 5.0, 2.0 +i = np.r_[1:11] +xi = 0.1*i +yi = c1*np.exp(-xi) + c2*xi +zi = yi + 0.05 * np.max(yi) * np.random.randn(len(yi)) +``` + +构造矩阵 $\mathbf A$: + + +```python +A = np.c_[np.exp(-xi)[:, np.newaxis], xi[:, np.newaxis]] +print A +``` + + [[ 0.90483742 0.1 ] + [ 0.81873075 0.2 ] + [ 0.74081822 0.3 ] + [ 0.67032005 0.4 ] + [ 0.60653066 0.5 ] + [ 0.54881164 0.6 ] + [ 0.4965853 0.7 ] + [ 0.44932896 0.8 ] + [ 0.40656966 0.9 ] + [ 0.36787944 1. ]] + + +求解最小二乘问题: + + +```python +c, resid, rank, sigma = linalg.lstsq(A, zi) + +print c +``` + + [ 4.87016856 2.19081311] + + +其中 `c` 的形状与 `zi` 一致,为最小二乘解,`resid` 为 `zi - A c` 每一列差值的二范数,`rank` 为矩阵 `A` 的秩,`sigma` 为矩阵 `A` 的奇异值。 + +查看拟合效果: + + +```python +xi2 = np.r_[0.1:1.0:100j] +yi2 = c[0]*np.exp(-xi2) + c[1]*xi2 + +plt.plot(xi,zi,'x',xi2,yi2) +plt.axis([0,1.1,3.0,5.5]) +plt.xlabel('$x_i$') +plt.title('Data fitting with linalg.lstsq') +plt.show() +``` + + +![png](output_51_0.png) + + +### 广义逆 + +`linalg.pinv` 或 `linalg.pinv2` 可以用来求广义逆,其区别在于前者使用求最小二乘解的算法,后者使用求奇异值的算法求解。 + +## 矩阵分解 + +### 特征值和特征向量 + +#### 问题描述 + +对于给定的 $N \times N$ 矩阵 $\mathbf A$,特征值和特征向量问题相当与寻找标量 $\lambda$ 和对应的向量 $\mathbf v$ 使得: +$$ +\mathbf{Av} = \lambda \mathbf{v} +$$ + +矩阵的 $N$ 个特征值(可能相同)可以通过计算特征方程的根得到: +$$ +\left|\mathbf{A} - \lambda \mathbf{I}\right| = 0 +$$ + +然后利用这些特征值求(归一化的)特征向量。 + +#### 问题求解 + +- `linalg.eig(A)` + - 返回矩阵的特征值与特征向量 +- `linalg.eigvals(A)` + - 返回矩阵的特征值 +- `linalg.eig(A, B)` + - 求解 $\mathbf{Av} = \lambda\mathbf{Bv}$ 的问题 + +#### 例子 + +矩阵为 +$$ +\begin{split}\mathbf{A}=\left[\begin{array}{ccc} 1 & 5 & 2\\ 2 & 4 & 1\\ 3 & 6 & 2\end{array}\right].\end{split} +$$ + +特征多项式为: +$$ +\begin{eqnarray*} \left|\mathbf{A}-\lambda\mathbf{I}\right| & = & \left(1-\lambda\right)\left[\left(4-\lambda\right)\left(2-\lambda\right)-6\right]-\\ & & 5\left[2\left(2-\lambda\right)-3\right]+2\left[12-3\left(4-\lambda\right)\right]\\ & = & -\lambda^{3}+7\lambda^{2}+8\lambda-3.\end{eqnarray*} +$$ + +特征根为: +$$ +\begin{eqnarray*} \lambda_{1} & = & 7.9579\\ \lambda_{2} & = & -1.2577\\ \lambda_{3} & = & 0.2997.\end{eqnarray*} +$$ + + +```python +A = np.array([[1, 5, 2], + [2, 4, 1], + [3, 6, 2]]) + +la, v = linalg.eig(A) + +print la + + +# 验证是否归一化 +print np.sum(abs(v**2),axis=0) + +# 第一个特征值 +l1 = la[0] +# 对应的特征向量 +v1 = v[:, 0].T + +# 验证是否为特征值和特征向量对 +print linalg.norm(A.dot(v1)-l1*v1) +``` + + [ 7.95791620+0.j -1.25766471+0.j 0.29974850+0.j] + [ 1. 1. 1.] + 3.23301824835e-15 + + +### 奇异值分解 + +#### 问题描述 + +$M \times N$ 矩阵 $\mathbf A$ 的奇异值分解为: +$$ +\mathbf{A=U}\boldsymbol{\Sigma}\mathbf{V}^{H} +$$ + +其中 $\boldsymbol{\Sigma}, (M \times N)$ 只有对角线上的元素不为 0,$\mathbf U, (M \times M)$ 和 $\mathbf V, (N \times N)$ 为正交矩阵。 + +其具体原理可以查看维基百科: +https://en.wikipedia.org/wiki/Singular_value_decomposition + +#### 问题求解 + +- `U,s,Vh = linalg.svd(A)` + - 返回 $U$ 矩阵,奇异值 $s$,$V^H$ 矩阵 +- `Sig = linalg.diagsvd(s,M,N)` + - 从奇异值恢复 $\boldsymbol{\Sigma}$ 矩阵 + +#### 例子 + +奇异值分解: + + +```python +A = np.array([[1,2,3],[4,5,6]]) + +U, s, Vh = linalg.svd(A) +``` + +$\boldsymbol{\Sigma}$ 矩阵: + + +```python +M, N = A.shape +Sig = linalg.diagsvd(s,M,N) + +print Sig +``` + + [[ 9.508032 0. 0. ] + [ 0. 0.77286964 0. ]] + + +检查正确性: + + +```python +print A +print U.dot(Sig.dot(Vh)) +``` + + [[1 2 3] + [4 5 6]] + [[ 1. 2. 3.] + [ 4. 5. 6.]] + + +### LU 分解 + +$M \times N$ 矩阵 $\mathbf A$ 的 `LU` 分解为: +$$ +\mathbf{A}=\mathbf{P}\,\mathbf{L}\,\mathbf{U} +$$ + +$\mathbf P$ 是 $M \times M$ 的单位矩阵的一个排列,$\mathbf L$ 是下三角阵,$\mathbf U$ 是上三角阵。 + +可以使用 `linalg.lu` 进行 LU 分解的求解: + +具体原理可以查看维基百科: +https://en.wikipedia.org/wiki/LU_decomposition + + +```python +A = np.array([[1,2,3],[4,5,6]]) + +P, L, U = linalg.lu(A) + +print P +print L +print U + +print P.dot(L).dot(U) +``` + + [[ 0. 1.] + [ 1. 0.]] + [[ 1. 0. ] + [ 0.25 1. ]] + [[ 4. 5. 6. ] + [ 0. 0.75 1.5 ]] + [[ 1. 2. 3.] + [ 4. 5. 6.]] + + +### Cholesky 分解 + +`Cholesky` 分解是一种特殊的 `LU` 分解,此时要求 $\mathbf A$ 为 Hermitian 正定矩阵 ($\mathbf A = \mathbf{A^H}$)。 + +此时有: +$$ +\begin{eqnarray*} \mathbf{A} & = & \mathbf{U}^{H}\mathbf{U}\\ \mathbf{A} & = & \mathbf{L}\mathbf{L}^{H}\end{eqnarray*} +$$ +即 +$$ +\mathbf{L}=\mathbf{U}^{H}. +$$ + +可以用 `linalg.cholesky` 求解。 + +### QR 分解 + +$M×N$ 矩阵 $\mathbf A$ 的 `QR` 分解为: +$$ +\mathbf{A=QR} +$$ + +$\mathbf R$ 为上三角形矩阵,$\mathbf Q$ 是正交矩阵。 + +维基链接: +https://en.wikipedia.org/wiki/QR_decomposition + +可以用 `linalg.qr` 求解。 + +### Schur 分解 + +对于 $N\times N$ 方阵 $\mathbf A$, `Schur` 分解要求找到满足下式的矩阵: +$$ +\mathbf{A=ZTZ^H} +$$ + +其中 $\mathbf Z$ 是正交矩阵,$\mathbf T$ 是一个上三角矩阵。 + +维基链接: +https://en.wikipedia.org/wiki/Schur_decomposition + + +```python +A = np.mat('[1 3 2; 1 4 5; 2 3 6]') + +print A + +T, Z = linalg.schur(A) + +print T, Z + +print Z.dot(T).dot(Z.T) +``` + + [[1 3 2] + [1 4 5] + [2 3 6]] + [[ 9.90012467 1.78947961 -0.65498528] + [ 0. 0.54993766 -1.57754789] + [ 0. 0.51260928 0.54993766]] [[ 0.36702395 -0.85002495 -0.37782404] + [ 0.63681656 -0.06646488 0.76814522] + [ 0.67805463 0.52253231 -0.51691576]] + [[ 1. 3. 2.] + [ 1. 4. 5.] + [ 2. 3. 6.]] + + +## 矩阵函数 + +考虑函数 $f(x)$ 的泰勒展开: +$$ +f\left(x\right)=\sum_{k=0}^{\infty}\frac{f^{\left(k\right)}\left(0\right)}{k!}x^{k} +$$ + +对于方阵,矩阵函数可以定义如下: +$$ +f\left(\mathbf{A}\right)=\sum_{k=0}^{\infty}\frac{f^{\left(k\right)}\left(0\right)}{k!}\mathbf{A}^{k} +$$ + +这也是计算矩阵函数的最好的方式。 + +### 指数和对数函数 + +#### 指数 + +指数可以定义如下: +$$ +e^{\mathbf{A}}=\sum_{k=0}^{\infty}\frac{1}{k!}\mathbf{A}^{k} +$$ + +`linalg.expm3` 使用的是泰勒展开的方法计算结果: + + +```python +A = np.array([[1, 2], [3, 4]]) + +print linalg.expm3(A) +``` + + [[ 51.96890355 74.73648784] + [ 112.10473176 164.07363531]] + + +另一种方法先计算 A 的特征值分解: +$$ +\mathbf{A}=\mathbf{V}\boldsymbol{\Lambda}\mathbf{V}^{-1} +$$ + +然后有(正交矩阵和对角阵的性质): +$$ +e^{\mathbf{A}}=\mathbf{V}e^{\boldsymbol{\Lambda}}\mathbf{V}^{-1} +$$ + +`linalg.expm2` 使用的就是这种方法: + + +```python +print linalg.expm2(A) +``` + + [[ 51.9689562 74.73656457] + [ 112.10484685 164.07380305]] + + +最优的方法是用 [`Padé` 近似](https://en.wikipedia.org/wiki/Pad%C3%A9_approximant) 实现,`Padé` 近似往往比截断的泰勒级数准确,而且当泰勒级数不收敛时,`Padé` 近似往往仍可行,所以多用于在计算机数学中。 + +`linalg.expm` 使用的就是这种方法: + + +```python +print linalg.expm(A) +``` + + [[ 51.9689562 74.73656457] + [ 112.10484685 164.07380305]] + + +#### 对数 + +指数的逆运算,可以用 `linalg.logm` 实现: + + +```python +print A +print linalg.logm(linalg.expm(A)) +``` + + [[1 2] + [3 4]] + [[ 1. 2.] + [ 3. 4.]] + + +### 三角函数 + +根据欧拉公式,其定义为: +$$ +\begin{eqnarray*} \sin\left(\mathbf{A}\right) & = & \frac{e^{j\mathbf{A}}-e^{-j\mathbf{A}}}{2j}\\ \cos\left(\mathbf{A}\right) & = & \frac{e^{j\mathbf{A}}+e^{-j\mathbf{A}}}{2}.\end{eqnarray*} +$$ + +正切函数定义为: +$$ +\tan\left(x\right)=\frac{\sin\left(x\right)}{\cos\left(x\right)}=\left[\cos\left(x\right)\right]^{-1}\sin\left(x\right) +$$ + +因此矩阵的正切函数定义为: +$$ +\left[\cos\left(\mathbf{A}\right)\right]^{-1}\sin\left(\mathbf{A}\right). +$$ + +具体实现: +- `linalg.sinm` +- `linalg.cosm` +- `linalg.tanm` + +### 双曲三角函数 + +\begin{eqnarray*} \sinh\left(\mathbf{A}\right) & = & \frac{e^{\mathbf{A}}-e^{-\mathbf{A}}}{2}\\ \cosh\left(\mathbf{A}\right) & = & \frac{e^{\mathbf{A}}+e^{-\mathbf{A}}}{2}\\ \tanh\left(\mathbf{A}\right) & = & \left[\cosh\left(\mathbf{A}\right)\right]^{-1}\sinh\left(\mathbf{A}\right).\end{eqnarray*} + +具体实现: +- `linalg.sinhm` +- `linalg.coshm` +- `linalg.tanhm` + +## 特殊矩阵 + +`Scipy` 提供了一些特殊矩阵的实现,具体可以参考: + +http://docs.scipy.org/doc/scipy/reference/tutorial/linalg.html#special-matrices diff --git a/docs/04-scipy/04.10-sparse-linear-algebra.md b/docs/04-scipy/04.10-sparse-linear-algebra.md new file mode 100644 index 00000000..bca451b0 --- /dev/null +++ b/docs/04-scipy/04.10-sparse-linear-algebra.md @@ -0,0 +1,39 @@ + +# 稀疏矩阵的线性代数 + +对于稀疏矩阵来说,其线性代数操作可以使用 `scipy.sparse.linalg` 实现: + + +```python +import scipy.sparse.linalg +``` + +## 矩阵操作 + +- `scipy.sparse.linalg.inv` + - 稀疏矩阵求逆 +- `scipy.sparse.linalg.expm` + - 求稀疏矩阵的指数函数 + +## 矩阵范数 + +- `scipy.sparse.linalg.norm` + - 稀疏矩阵求范数 + +## 线性方程组求解 + +提供了一系列求解方法: +http://docs.scipy.org/doc/scipy/reference/sparse.linalg.html#solving-linear-problems + +主要使用的是迭代方法求解。 + +## 特征值分解和奇异值分解 + +对于特别大的矩阵,原来的方法可能需要太大的内存,考虑使用这两个方法替代: + +- `scipy.sparse.linalg.eigs` + - 返回前 k 大的特征值和特征向量 +- `scipy.sparse.linalg.svds` + - 返回前 k 大的奇异值和奇异向量 + +## 所有的这些操作既可以在稀疏矩阵上使用,也可以在普通矩阵上使用。 diff --git a/docs/05-advanced-python/05.01-overview-of-the-sys-module.md b/docs/05-advanced-python/05.01-overview-of-the-sys-module.md new file mode 100644 index 00000000..728029d2 --- /dev/null +++ b/docs/05-advanced-python/05.01-overview-of-the-sys-module.md @@ -0,0 +1,162 @@ + +# sys 模块简介 + + +```python +import sys +``` + +## 命令行参数 + +`sys.argv` 显示传入的参数: + + +```python +%%writefile print_args.py +import sys +print sys.argv +``` + + Writing print_args.py + + +运行这个程序: + + +```python +%run print_args.py 1 foo +``` + + ['print_args.py', '1', 'foo'] + + +第一个参数 (`sys.args[0]`) 表示的始终是执行的文件名,然后依次显示传入的参数。 + +删除刚才生成的文件: + + +```python +import os +os.remove('print_args.py') +``` + +## 异常消息 + +`sys.exc_info()` 可以显示 `Exception` 的信息,返回一个 `(type, value, traceback)` 组成的三元组,可以与 `try/catch` 块一起使用: + + +```python +try: + x = 1/0 +except Exception: + print sys.exc_info() +``` + + (, ZeroDivisionError('integer division or modulo by zero',), ) + + +`sys.exc_clear()` 用于清除所有的异常消息。 + +## 标准输入输出流 + +- sys.stdin +- sys.stdout +- sys.stderr + +## 退出Python + +`sys.exit(arg=0)` 用于退出 Python。`0` 或者 `None` 表示正常退出,其他值表示异常。 + +## Python Path + +`sys.path` 表示 Python 搜索模块的路径和查找顺序: + + +```python +sys.path +``` + + + + + ['', + 'C:\\Anaconda\\python27.zip', + 'C:\\Anaconda\\DLLs', + 'C:\\Anaconda\\lib', + 'C:\\Anaconda\\lib\\plat-win', + 'C:\\Anaconda\\lib\\lib-tk', + 'C:\\Anaconda', + 'C:\\Anaconda\\lib\\site-packages', + 'C:\\Anaconda\\lib\\site-packages\\Sphinx-1.3.1-py2.7.egg', + 'C:\\Anaconda\\lib\\site-packages\\cryptography-0.9.1-py2.7-win-amd64.egg', + 'C:\\Anaconda\\lib\\site-packages\\win32', + 'C:\\Anaconda\\lib\\site-packages\\win32\\lib', + 'C:\\Anaconda\\lib\\site-packages\\Pythonwin', + 'C:\\Anaconda\\lib\\site-packages\\setuptools-17.1.1-py2.7.egg', + 'C:\\Anaconda\\lib\\site-packages\\IPython\\extensions'] + + + +在程序中可以修改,添加新的路径。 + +## 操作系统信息 + +`sys.platform` 显示当前操作系统信息: + +- `Windows: win32` +- `Mac OSX: darwin` +- `Linux: linux2` + + +```python +sys.platform +``` + + + + + 'win32' + + + +返回 `Windows` 操作系统的版本: + + +```python +sys.getwindowsversion() +``` + + + + + sys.getwindowsversion(major=6, minor=2, build=9200, platform=2, service_pack='') + + + +标准库中有 `planform` 模块提供更详细的信息。 + +## Python 版本信息 + + +```python +sys.version +``` + + + + + '2.7.10 |Anaconda 2.3.0 (64-bit)| (default, May 28 2015, 16:44:52) [MSC v.1500 64 bit (AMD64)]' + + + + +```python +sys.version_info +``` + + + + + sys.version_info(major=2, minor=7, micro=10, releaselevel='final', serial=0) + + diff --git a/docs/05-advanced-python/05.02-interacting-with-the-OS---os.md b/docs/05-advanced-python/05.02-interacting-with-the-OS---os.md new file mode 100644 index 00000000..b358ded2 --- /dev/null +++ b/docs/05-advanced-python/05.02-interacting-with-the-OS---os.md @@ -0,0 +1,207 @@ + +# 与操作系统进行交互:os 模块 + +`os` 模块提供了对系统文件进行操作的方法: + + +```python +import os +``` + +## 文件路径操作 + +- `os.remove(path)` 或 `os.unlink(path)` :删除指定路径的文件。路径可以是全名,也可以是当前工作目录下的路径。 +- `os.removedirs`:删除文件,并删除中间路径中的空文件夹 +- `os.chdir(path)`:将当前工作目录改变为指定的路径 +- `os.getcwd()`:返回当前的工作目录 +- `os.curdir`:表示当前目录的符号 +- `os.rename(old, new)`:重命名文件 +- `os.renames(old, new)`:重命名文件,如果中间路径的文件夹不存在,则创建文件夹 +- `os.listdir(path)`:返回给定目录下的所有文件夹和文件名,不包括 `'.'` 和 `'..'` 以及子文件夹下的目录。(`'.'` 和 `'..'` 分别指当前目录和父目录) +- `os.mkdir(name)`:产生新文件夹 +- `os.makedirs(name)`:产生新文件夹,如果中间路径的文件夹不存在,则创建文件夹 + +当前目录: + + +```python +os.getcwd() +``` + + + + + '/home/lijin/notes-python/05. advanced python' + + + +当前目录的符号: + + +```python +os.curdir +``` + + + + + '.' + + + +当前目录下的文件: + + +```python +os.listdir(os.curdir) +``` + + + + + ['05.01 overview of the sys module.ipynb', + '05.05 datetime.ipynb', + '05.13 decorator usage.ipynb', + '.ipynb_checkpoints', + '05.03 comma separated values.ipynb', + '05.02 interacting with the OS - os.ipynb', + '05.10 generators.ipynb', + '05.15 scope.ipynb', + '05.12 decorators.ipynb', + '05.09 iterators.ipynb', + 'my_database.sqlite', + '05.11 context managers and the with statement.ipynb', + '05.16 dynamic code execution.ipynb', + '05.14 the operator functools itertools toolz fn funcy module.ipynb', + '05.04 regular expression.ipynb', + '05.07 object-relational mappers.ipynb', + '05.08 functions.ipynb', + '05.06 sql databases.ipynb'] + + + +产生文件: + + +```python +f = open("test.file", "w") +f.close() + +print "test.file" in os.listdir(os.curdir) +``` + + True + + +重命名文件: + + +```python +os.rename("test.file", "test.new.file") + +print "test.file" in os.listdir(os.curdir) +print "test.new.file" in os.listdir(os.curdir) +``` + + False + True + + +删除文件: + + +```python +os.remove("test.new.file") +``` + +## 系统常量 + +当前操作系统的换行符: + + +```python +# windows 为 \r\n +os.linesep +``` + + + + + '\n' + + + +当前操作系统的路径分隔符: + + +```python +os.sep +``` + + + + + '/' + + + +当前操作系统的环境变量中的分隔符(`';'` 或 `':'`): + + +```python +os.pathsep +``` + + + + + ':' + + + +## 其他 + +`os.environ` 是一个存储所有环境变量的值的字典,可以修改。 + + +```python +os.environ["USER"] +``` + + + + + 'lijin' + + + +`os.urandom(len)` 返回指定长度的随机字节。 + +## os.path 模块 + +不同的操作系统使用不同的路径规范,这样当我们在不同的操作系统下进行操作时,可能会带来一定的麻烦,而 `os.path` 模块则帮我们解决了这个问题。 + + +```python +import os.path +``` + +### 测试 + +- `os.path.isfile(path)` :检测一个路径是否为普通文件 +- `os.path.isdir(path)`:检测一个路径是否为文件夹 +- `os.path.exists(path)`:检测路径是否存在 +- `os.path.isabs(path)`:检测路径是否为绝对路径 + +### split 和 join + +- `os.path.split(path)`:拆分一个路径为 `(head, tail)` 两部分 +- `os.path.join(a, *p)`:使用系统的路径分隔符,将各个部分合成一个路径 + +### 其他 + +- `os.path.abspath()`:返回路径的绝对路径 +- `os.path.dirname(path)`:返回路径中的文件夹部分 +- `os.path.basename(path)`:返回路径中的文件部分 +- `os.path.splitext(path)`:将路径与扩展名分开 +- `os.path.expanduser(path)`:展开 `'~'` 和 `'~user'` diff --git a/docs/05-advanced-python/05.03-comma-separated-values.md b/docs/05-advanced-python/05.03-comma-separated-values.md new file mode 100644 index 00000000..e5caee19 --- /dev/null +++ b/docs/05-advanced-python/05.03-comma-separated-values.md @@ -0,0 +1,205 @@ + +# CSV 文件和 csv 模块 + +标准库中有自带的 `csv` (逗号分隔值) 模块处理 `csv` 格式的文件: + + +```python +import csv +``` + +## 读 csv 文件 + +假设我们有这样的一个文件: + + +```python +%%file data.csv +"alpha 1", 100, -1.443 +"beat 3", 12, -0.0934 +"gamma 3a", 192, -0.6621 +"delta 2a", 15, -4.515 +``` + + Writing data.csv + + +打开这个文件,并产生一个文件 reader: + + +```python +fp = open("data.csv") +r = csv.reader(fp) +``` + +可以按行迭代数据: + + +```python +for row in r: + print row + +fp.close() +``` + + ['alpha 1', ' 100', ' -1.443'] + ['beat 3', ' 12', ' -0.0934'] + ['gamma 3a', ' 192', ' -0.6621'] + ['delta 2a', ' 15', ' -4.515'] + + +默认数据内容都被当作字符串处理,不过可以自己进行处理: + + +```python +data = [] + +with open('data.csv') as fp: + r = csv.reader(fp) + for row in r: + data.append([row[0], int(row[1]), float(row[2])]) + +data +``` + + + + + [['alpha 1', 100, -1.443], + ['beat 3', 12, -0.0934], + ['gamma 3a', 192, -0.6621], + ['delta 2a', 15, -4.515]] + + + + +```python +import os +os.remove('data.csv') +``` + +## 写 csv 文件 + +可以使用 `csv.writer` 写入文件,不过相应地,传入的应该是以写方式打开的文件,不过一般要用 `'wb'` 即二进制写入方式,防止出现换行不正确的问题: + + +```python +data = [('one', 1, 1.5), ('two', 2, 8.0)] +with open('out.csv', 'wb') as fp: + w = csv.writer(fp) + w.writerows(data) +``` + +显示结果: + + +```python +!cat 'out.csv' +``` + + one,1,1.5 + two,2,8.0 + + +## 更换分隔符 + +默认情况下,`csv` 模块默认 `csv` 文件都是由 `excel` 产生的,实际中可能会遇到这样的问题: + + +```python +data = [('one, \"real\" string', 1, 1.5), ('two', 2, 8.0)] +with open('out.csv', 'wb') as fp: + w = csv.writer(fp) + w.writerows(data) +``` + + +```python +!cat 'out.csv' +``` + + "one, ""real"" string",1,1.5 + two,2,8.0 + + +可以修改分隔符来处理这组数据: + + +```python +data = [('one, \"real\" string', 1, 1.5), ('two', 2, 8.0)] +with open('out.psv', 'wb') as fp: + w = csv.writer(fp, delimiter="|") + w.writerows(data) +``` + + +```python +!cat 'out.psv' +``` + + "one, ""real"" string"|1|1.5 + two|2|8.0 + + + +```python +import os +os.remove('out.psv') +os.remove('out.csv') +``` + +## 其他选项 + +`numpy.loadtxt()` 和 `pandas.read_csv()` 可以用来读写包含很多数值数据的 `csv` 文件: + + +```python +%%file trades.csv +Order,Date,Stock,Quantity,Price +A0001,2013-12-01,AAPL,1000,203.4 +A0002,2013-12-01,MSFT,1500,167.5 +A0003,2013-12-02,GOOG,1500,167.5 +``` + + Writing trades.csv + + +使用 `pandas` 进行处理,生成一个 `DataFrame` 对象: + + +```python +import pandas +df = pandas.read_csv('trades.csv', index_col=0) +print df +``` + + Date Stock Quantity Price + Order + A0001 2013-12-01 AAPL 1000 203.4 + A0002 2013-12-01 MSFT 1500 167.5 + A0003 2013-12-02 GOOG 1500 167.5 + + +通过名字进行索引: + + +```python +df['Quantity'] * df['Price'] +``` + + + + + Order + A0001 203400 + A0002 251250 + A0003 251250 + dtype: float64 + + + + +```python +import os +os.remove('trades.csv') +``` diff --git a/docs/05-advanced-python/05.04-regular-expression.md b/docs/05-advanced-python/05.04-regular-expression.md new file mode 100644 index 00000000..fb977e32 --- /dev/null +++ b/docs/05-advanced-python/05.04-regular-expression.md @@ -0,0 +1,220 @@ + +# 正则表达式和 re 模块 + +## 正则表达式 + +[正则表达式](http://baike.baidu.com/view/94238.htm)是用来匹配字符串或者子串的一种模式,匹配的字符串可以很具体,也可以很一般化。 + +`Python` 标准库提供了 `re` 模块。 + + +```python +import re +``` + +## re.match & re.search + +在 `re` 模块中, `re.match` 和 `re.search` 是常用的两个方法: + + re.match(pattern, string[, flags]) + re.search(pattern, string[, flags]) + +两者都寻找第一个匹配成功的部分,成功则返回一个 `match` 对象,不成功则返回 `None`,不同之处在于 `re.match` 只匹配字符串的开头部分,而 `re.search` 匹配的则是整个字符串中的子串。 + +## re.findall & re.finditer + +`re.findall(pattern, string)` 返回所有匹配的对象, `re.finditer` 则返回一个迭代器。 + +## re.split + +`re.split(pattern, string[, maxsplit])` 按照 `pattern` 指定的内容对字符串进行分割。 + +## re.sub + +`re.sub(pattern, repl, string[, count])` 将 `pattern` 匹配的内容进行替换。 + +## re.compile + +`re.compile(pattern)` 生成一个 `pattern` 对象,这个对象有匹配,替换,分割字符串的方法。 + +## 正则表达式规则 + +正则表达式由一些普通字符和一些元字符(metacharacters)组成。普通字符包括大小写的字母和数字,而元字符则具有特殊的含义: + +子表达式|匹配内容 +---|--- +`.`| 匹配除了换行符之外的内容 +`\w` | 匹配所有字母和数字字符 +`\d` | 匹配所有数字,相当于 `[0-9]` +`\s` | 匹配空白,相当于 `[\t\n\t\f\v]` +`\W,\D,\S`| 匹配对应小写字母形式的补 +`[...]` | 表示可以匹配的集合,支持范围表示如 `a-z`, `0-9` 等 +`(...)` | 表示作为一个整体进行匹配 +¦ | 表示逻辑或 +`^` | 表示匹配后面的子表达式的补 +`*` | 表示匹配前面的子表达式 0 次或更多次 +`+` | 表示匹配前面的子表达式 1 次或更多次 +`?` | 表示匹配前面的子表达式 0 次或 1 次 +`{m}` | 表示匹配前面的子表达式 m 次 +`{m,}` | 表示匹配前面的子表达式至少 m 次 +`{m,n}` | 表示匹配前面的子表达式至少 m 次,至多 n 次 + +例如: + +- `ca*t 匹配: ct, cat, caaaat, ...` +- `ab\d|ac\d 匹配: ab1, ac9, ...` +- `([^a-q]bd) 匹配: rbd, 5bd, ...` + +## 例子 + +假设我们要匹配这样的字符串: + + +```python +string = 'hello world' +pattern = 'hello (\w+)' + +match = re.match(pattern, string) +print match +``` + + <_sre.SRE_Match object at 0x0000000003A5DA80> + + +一旦找到了符合条件的部分,我们便可以使用 `group` 方法查看匹配的部分: + + +```python +if match is not None: + print match.group(0) +``` + + hello world + + + +```python +if match is not None: + print match.group(1) +``` + + world + + +我们可以改变 string 的内容: + + +```python +string = 'hello there' +pattern = 'hello (\w+)' + +match = re.match(pattern, string) +if match is not None: + print match.group(0) + print match.group(1) +``` + + hello there + there + + +通常,`match.group(0)` 匹配整个返回的内容,之后的 `1,2,3,...` 返回规则中每个括号(按照括号的位置排序)匹配的部分。 + +如果某个 `pattern` 需要反复使用,那么我们可以将它预先编译: + + +```python +pattern1 = re.compile('hello (\w+)') + +match = pattern1.match(string) +if match is not None: + print match.group(1) +``` + + there + + +由于元字符的存在,所以对于一些特殊字符,我们需要使用 `'\'` 进行逃逸字符的处理,使用表达式 `'\\'` 来匹配 `'\'` 。 + +但事实上,`Python` 本身对逃逸字符也是这样处理的: + + +```python +pattern = '\\' +print pattern +``` + + \ + + +因为逃逸字符的问题,我们需要使用四个 `'\\\\'` 来匹配一个单独的 `'\'`: + + +```python +pattern = '\\\\' +path = "C:\\foo\\bar\\baz.txt" +print re.split(pattern, path) +``` + + ['C:', 'foo', 'bar', 'baz.txt'] + + +这样看起来十分麻烦,好在 `Python` 提供了 `raw string` 来忽略对逃逸字符串的处理,从而可以这样进行匹配: + + +```python +pattern = r'\\' +path = r"C:\foo\bar\baz.txt" +print re.split(pattern, path) +``` + + ['C:', 'foo', 'bar', 'baz.txt'] + + +如果规则太多复杂,正则表达式不一定是个好选择。 + +## Numpy 的 fromregex() + + +```python +%%file test.dat +1312 foo +1534 bar +444 qux +``` + + Writing test.dat + + + fromregex(file, pattern, dtype) + +`dtype` 中的内容与 `pattern` 的括号一一对应: + + +```python +pattern = "(\d+)\s+(...)" +dt = [('num', 'int64'), ('key', 'S3')] + +from numpy import fromregex +output = fromregex('test.dat', pattern, dt) +print output +``` + + [(1312L, 'foo') (1534L, 'bar') (444L, 'qux')] + + +显示 `num` 项: + + +```python +print output['num'] +``` + + [1312 1534 444] + + + +```python +import os +os.remove('test.dat') +``` diff --git a/docs/05-advanced-python/05.05-datetime.md b/docs/05-advanced-python/05.05-datetime.md new file mode 100644 index 00000000..d17c1c4b --- /dev/null +++ b/docs/05-advanced-python/05.05-datetime.md @@ -0,0 +1,150 @@ + +# datetime 模块 + + +```python +import datetime as dt +``` + +`datetime` 提供了基础时间和日期的处理。 + +## date 对象 + +可以使用 `date(year, month, day)` 产生一个 `date` 对象: + + +```python +d1 = dt.date(2007, 9, 25) +d2 = dt.date(2008, 9, 25) +``` + +可以格式化 `date` 对象的输出: + + +```python +print d1 +print d1.strftime('%A, %m/%d/%y') +print d1.strftime('%a, %m-%d-%Y') +``` + + 2007-09-25 + Tuesday, 09/25/07 + Tue, 09-25-2007 + + +可以看两个日期相差多久: + + +```python +print d2 - d1 +``` + + 366 days, 0:00:00 + + +返回的是一个 `timedelta` 对象: + + +```python +d = d2 - d1 +print d.days +print d.seconds +``` + + 366 + 0 + + +查看今天的日期: + + +```python +print dt.date.today() +``` + + 2015-09-10 + + +## time 对象 + +可以使用 `time(hour, min, sec, us)` 产生一个 `time` 对象: + + +```python +t1 = dt.time(15, 38) +t2 = dt.time(18) +``` + +改变显示格式: + + +```python +print t1 +print t1.strftime('%I:%M, %p') +print t1.strftime('%H:%M:%S, %p') +``` + + 15:38:00 + 03:38, PM + 15:38:00, PM + + +因为没有具体的日期信息,所以 `time` 对象不支持减法操作。 + +## datetime 对象 + +可以使用 `datetime(year, month, day, hr, min, sec, us)` 来创建一个 `datetime` 对象。 + +获得当前时间: + + +```python +d1 = dt.datetime.now() +print d1 +``` + + 2015-09-10 20:58:50.148000 + + +给当前的时间加上 `30` 天,`timedelta` 的参数是 `timedelta(day, hr, min, sec, us)`: + + +```python +d2 = d1 + dt.timedelta(30) +print d2 +``` + + 2015-10-10 20:58:50.148000 + + +除此之外,我们还可以通过一些指定格式的字符串来创建 `datetime` 对象: + + +```python +print dt.datetime.strptime('2/10/01', '%m/%d/%y') +``` + + 2001-02-10 00:00:00 + + +## datetime 格式字符表 + +字符|含义 +--|-- +`%a` | 星期英文缩写 +`%A` | 星期英文 +`%w` | 一星期的第几天,`[0(sun),6]` +`%b` | 月份英文缩写 +`%B` | 月份英文 +`%d` | 日期,`[01,31]` +`%H` | 小时,`[00,23]` +`%I` | 小时,`[01,12]` +`%j` | 一年的第几天,`[001,366]` +`%m` | 月份,`[01,12]` +`%M` | 分钟,`[00,59]` +`%p` | AM 和 PM +`%S` | 秒钟,`[00,61]` (大概是有闰秒的存在) +`%U` | 一年中的第几个星期,星期日为第一天,`[00,53]` +`%W` | 一年中的第几个星期,星期一为第一天,`[00,53]` +`%y` | 没有世纪的年份 +`%Y` | 完整的年份 diff --git a/docs/05-advanced-python/05.06-sql-databases.md b/docs/05-advanced-python/05.06-sql-databases.md new file mode 100644 index 00000000..2d5aa983 --- /dev/null +++ b/docs/05-advanced-python/05.06-sql-databases.md @@ -0,0 +1,122 @@ + +# SQL 数据库 + +`Python` 提供了一系列标准的数据库的 API,这里我们介绍 sqlite 数据库的用法,其他的数据库的用法大同小异: + + +```python +import sqlite3 as db +``` + +首先我们要建立或者连接到一个数据库上: + + +```python +connection = db.connect("my_database.sqlite") +``` + +不同的数据库有着不同的连接方法,例如 cx-oracle 数据库的链接方式为: + + connection = db.connect(username, password, host, port, 'XE') + +一旦建立连接,我们可以利用它的 `cursor()` 来执行 SQL 语句: + + +```python +cursor = connection.cursor() +cursor.execute("""CREATE TABLE IF NOT EXISTS orders( + order_id TEXT PRIMARY KEY, + date TEXT, + symbol TEXT, + quantity INTEGER, + price NUMBER)""") +cursor.execute("""INSERT INTO orders VALUES + ('A0001', '2013-12-01', 'AAPL', 1000, 203.4)""") +connection.commit() +``` + +不过为了安全起见,一般不将数据内容写入字符串再传入,而是使用这样的方式: + + +```python +orders = [ + ("A0002","2013-12-01","MSFT",1500,167.5), + ("A0003","2013-12-02","GOOG",1500,167.5) +] +cursor.executemany("""INSERT INTO orders VALUES + (?, ?, ?, ?, ?)""", orders) +connection.commit() +``` + +cx-oracle 数据库使用不同的方式: + + cursor.executemany("""INSERT INTO orders VALUES + (:order_id, :date, :symbol, :quantity, :price)""", + orders) + +查看支持的数据库格式: + + +```python +db.paramstyle +``` + + + + + 'qmark' + + + +在 `query` 语句执行之后,我们需要进行 `commit`,否则数据库将不会接受这些变化,如果想撤销某个 `commit`,可以使用 `rollback()` 方法撤销到上一次 `commit()` 的结果: + + try: + ... # perform some operations + except: + connection.rollback() + raise + else: + connection.commit() + +使用 `SELECT` 语句对数据库进行查询: + + +```python +stock = 'MSFT' +cursor.execute("""SELECT * + FROM orders + WHERE symbol=? + ORDER BY quantity""", (stock,)) +for row in cursor: + print row +``` + + (u'A0002', u'2013-12-01', u'MSFT', 1500, 167.5) + + +`cursor.fetchone()` 返回下一条内容, `cursor.fetchall()` 返回所有查询到的内容组成的列表(可能非常大): + + +```python +stock = 'AAPL' +cursor.execute("""SELECT * + FROM orders + WHERE symbol=? + ORDER BY quantity""", (stock,)) +cursor.fetchall() +``` + + + + + [(u'A0001', u'2013-12-01', u'AAPL', 1000, 203.4)] + + + +关闭数据库: + + +```python +cursor.close() +connection.close() +``` diff --git a/docs/05-advanced-python/05.07-object-relational-mappers.md b/docs/05-advanced-python/05.07-object-relational-mappers.md new file mode 100644 index 00000000..931d3c59 --- /dev/null +++ b/docs/05-advanced-python/05.07-object-relational-mappers.md @@ -0,0 +1,130 @@ + +# 对象关系映射 + +数据库中的记录可以与一个 `Python` 对象对应。 + +例如对于上一节中的数据库: + +Order|Date|Stock|Quantity|Price +--|--|--|--|-- +A0001|2013-12-01|AAPL|1000|203.4 +A0002|2013-12-01|MSFT|1500|167.5 +A0003|2013-12-02|GOOG|1500|167.5 + +可以用一个类来描述: + +Attr.|Method +--|-- +Order id| Cost +Date| +Stock| +Quant.| +Price| + +可以使用 `sqlalchemy` 来实现这种对应: + + +```python +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy import Column, Date, Float, Integer, String + +Base = declarative_base() + +class Order(Base): + __tablename__ = 'orders' + + order_id = Column(String, primary_key=True) + date = Column(Date) + symbol = Column(String) + quantity = Column(Integer) + price = Column(Float) + + def get_cost(self): + return self.quantity*self.price +``` + +生成一个 `Order` 对象: + + +```python +import datetime +order = Order(order_id='A0004', date=datetime.date.today(), symbol='MSFT', quantity=-1000, price=187.54) +``` + +调用方法: + + +```python +order.get_cost() +``` + + + + + -187540.0 + + + +使用上一节生成的数据库产生一个 `session`: + + +```python +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +engine = create_engine("sqlite:///my_database.sqlite") # 相当于 connection +Session = sessionmaker(bind=engine) # 相当于 cursor +session = Session() +``` + +使用这个 `session` 向数据库中添加刚才生成的对象: + + +```python +session.add(order) +session.commit() +``` + +显示是否添加成功: + + +```python +for row in engine.execute("SELECT * FROM orders"): + print row +``` + + (u'A0001', u'2013-12-01', u'AAPL', 1000, 203.4) + (u'A0002', u'2013-12-01', u'MSFT', 1500, 167.5) + (u'A0003', u'2013-12-02', u'GOOG', 1500, 167.5) + (u'A0004', u'2015-09-10', u'MSFT', -1000, 187.54) + + +使用 `filter` 进行查询,返回的是 `Order` 对象的列表: + + +```python +for order in session.query(Order).filter(Order.symbol=="AAPL"): + print order.order_id, order.date, order.get_cost() +``` + + A0001 2013-12-01 203400.0 + + +返回列表的第一个: + + +```python +order_2 = session.query(Order).filter(Order.order_id=='A0002').first() +``` + + +```python +order_2.symbol +``` + + + + + u'MSFT' + + diff --git a/docs/05-advanced-python/05.08-functions.md b/docs/05-advanced-python/05.08-functions.md new file mode 100644 index 00000000..bfd2749c --- /dev/null +++ b/docs/05-advanced-python/05.08-functions.md @@ -0,0 +1,446 @@ + +# 函数进阶:参数传递,高阶函数,lambda 匿名函数,global 变量,递归 + +## 函数是基本类型 + +在 `Python` 中,函数是一种基本类型的对象,这意味着 + +- 可以将函数作为参数传给另一个函数 +- 将函数作为字典的值储存 +- 将函数作为另一个函数的返回值 + + +```python +def square(x): + """Square of x.""" + return x*x + +def cube(x): + """Cube of x.""" + return x*x*x +``` + +作为字典的值: + + +```python +funcs = { + 'square': square, + 'cube': cube, +} +``` + +例子: + + +```python +x = 2 + +print square(x) +print cube(x) + +for func in sorted(funcs): + print func, funcs[func](x) +``` + + 4 + 8 + cube 8 + square 4 + + +## 函数参数 + +### 引用传递 + +`Python` 中的函数传递方式是 `call by reference` 即引用传递,例如,对于这样的用法: + + x = [10, 11, 12] + f(x) + +传递给函数 `f` 的是一个指向 `x` 所包含内容的引用,如果我们修改了这个引用所指向内容的值(例如 `x[0]=999`),那么外面的 `x` 的值也会被改变。不过如果我们在函数中赋给 `x` 一个新的值(例如另一个列表),那么在函数外面的 `x` 的值不会改变: + + +```python +def mod_f(x): + x[0] = 999 + return x + +x = [1, 2, 3] + +print x +print mod_f(x) +print x +``` + + [1, 2, 3] + [999, 2, 3] + [999, 2, 3] + + + +```python +def no_mod_f(x): + x = [4, 5, 6] + return x + +x = [1,2,3] + +print x +print no_mod_f(x) +print x +``` + + [1, 2, 3] + [4, 5, 6] + [1, 2, 3] + + +### 默认参数是可变的! + +函数可以传递默认参数,默认参数的绑定发生在函数定义的时候,以后每次调用默认参数时都会使用同一个引用。 + +这样的机制会导致这种情况的发生: + + +```python +def f(x = []): + x.append(1) + return x +``` + +理论上说,我们希望调用 `f()` 时返回的是 `[1]`, 但事实上: + + +```python +print f() +print f() +print f() +print f(x = [9,9,9]) +print f() +print f() +``` + + [1] + [1, 1] + [1, 1, 1] + [9, 9, 9, 1] + [1, 1, 1, 1] + [1, 1, 1, 1, 1] + + +而我们希望看到的应该是这样: + + +```python +def f(x = None): + if x is None: + x = [] + x.append(1) + return x + +print f() +print f() +print f() +print f(x = [9,9,9]) +print f() +print f() +``` + + [1] + [1] + [1] + [9, 9, 9, 1] + [1] + [1] + + +## 高阶函数 + +以函数作为参数,或者返回一个函数的函数是高阶函数,常用的例子有 `map` 和 `filter` 函数: + +`map(f, sq)` 函数将 `f` 作用到 `sq` 的每个元素上去,并返回结果组成的列表,相当于: +```python +[f(s) for s in sq] +``` + + +```python +map(square, range(5)) +``` + + + + + [0, 1, 4, 9, 16] + + + +`filter(f, sq)` 函数的作用相当于,对于 `sq` 的每个元素 `s`,返回所有 `f(s)` 为 `True` 的 `s` 组成的列表,相当于: +```python +[s for s in sq if f(s)] +``` + + +```python +def is_even(x): + return x % 2 == 0 + +filter(is_even, range(5)) +``` + + + + + [0, 2, 4] + + + +一起使用: + + +```python +map(square, filter(is_even, range(5))) +``` + + + + + [0, 4, 16] + + + +`reduce(f, sq)` 函数接受一个二元操作函数 `f(x,y)`,并对于序列 `sq` 每次合并两个元素: + + +```python +def my_add(x, y): + return x + y + +reduce(my_add, [1,2,3,4,5]) +``` + + + + + 15 + + + +传入加法函数,相当于对序列求和。 + +返回一个函数: + + +```python +def make_logger(target): + def logger(data): + with open(target, 'a') as f: + f.write(data + '\n') + return logger + +foo_logger = make_logger('foo.txt') +foo_logger('Hello') +foo_logger('World') +``` + + +```python +!cat foo.txt +``` + + Hello + World + + + +```python +import os +os.remove('foo.txt') +``` + +## 匿名函数 + +在使用 `map`, `filter`,`reduce` 等函数的时候,为了方便,对一些简单的函数,我们通常使用匿名函数的方式进行处理,其基本形式是: + + lambda : + +例如,我们可以将这个: + + +```python +print map(square, range(5)) +``` + + [0, 1, 4, 9, 16] + + +用匿名函数替换为: + + +```python +print map(lambda x: x * x, range(5)) +``` + + [0, 1, 4, 9, 16] + + +匿名函数虽然写起来比较方便(省去了定义函数的烦恼),但是有时候会比较难于阅读: + + +```python +s1 = reduce(lambda x, y: x+y, map(lambda x: x**2, range(1,10))) +print(s1) +``` + + 285 + + +当然,更简单地,我们可以写成这样: + + +```python +s2 = sum(x**2 for x in range(1, 10)) +print s2 +``` + + 285 + + +# global 变量 + +一般来说,函数中是可以直接使用全局变量的值的: + + +```python +x = 15 + +def print_x(): + print x + +print_x() +``` + + 15 + + +但是要在函数中修改全局变量的值,需要加上 `global` 关键字: + + +```python +x = 15 + +def print_newx(): + global x + x = 18 + print x + +print_newx() + +print x +``` + + 18 + 18 + + +如果不加上这句 `global` 那么全局变量的值不会改变: + + +```python +x = 15 + +def print_newx(): + x = 18 + print x + +print_newx() + +print x +``` + + 18 + 15 + + +## 递归 + +递归是指函数在执行的过程中调用了本身,一般用于分治法,不过在 `Python` 中这样的用法十分地小,所以一般不怎么使用: + +Fibocacci 数列: + + +```python +def fib1(n): + """Fib with recursion.""" + + # base case + if n==0 or n==1: + return 1 + # recurssive caae + else: + return fib1(n-1) + fib1(n-2) + +print [fib1(i) for i in range(10)] +``` + + [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] + + +一个更高效的非递归版本: + + +```python +def fib2(n): + """Fib without recursion.""" + a, b = 0, 1 + for i in range(1, n+1): + a, b = b, a+b + return b + +print [fib2(i) for i in range(10)] +``` + + [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] + + +速度比较: + + +```python +%timeit fib1(20) +%timeit fib2(20) +``` + + 100 loops, best of 3: 5.35 ms per loop + 100000 loops, best of 3: 2.2 µs per loop + + +对于第一个递归函数来说,调用 `fib(n+2)` 的时候计算 `fib(n+1), fib(n)`,调用 `fib(n+1)` 的时候也计算了一次 `fib(n)`,这样造成了重复计算。 + +使用缓存机制的递归版本,这里利用了默认参数可变的性质,构造了一个缓存: + + +```python +def fib3(n, cache={0: 1, 1: 1}): + """Fib with recursion and caching.""" + + try: + return cache[n] + except KeyError: + cache[n] = fib3(n-1) + fib3(n-2) + return cache[n] + +print [fib3(i) for i in range(10)] + +%timeit fib1(20) +%timeit fib2(20) +%timeit fib3(20) +``` + + [1, 1, 2, 3, 5, 8, 13, 21, 34, 55] + 100 loops, best of 3: 5.37 ms per loop + 100000 loops, best of 3: 2.19 µs per loop + The slowest run took 150.16 times longer than the fastest. This could mean that an intermediate result is being cached + 1000000 loops, best of 3: 230 ns per loop + diff --git a/docs/05-advanced-python/05.09-iterators.md b/docs/05-advanced-python/05.09-iterators.md new file mode 100644 index 00000000..2a55a2b6 --- /dev/null +++ b/docs/05-advanced-python/05.09-iterators.md @@ -0,0 +1,311 @@ + +# 迭代器 + +## 简介 + +迭代器对象可以在 `for` 循环中使用: + + +```python +x = [2, 4, 6] + +for n in x: + print n +``` + + 2 + 4 + 6 + + +其好处是不需要对下标进行迭代,但是有些情况下,我们既希望获得下标,也希望获得对应的值,那么可以将迭代器传给 `enumerate` 函数,这样每次迭代都会返回一组 `(index, value)` 组成的元组: + + +```python +x = [2, 4, 6] + +for i, n in enumerate(x): + print 'pos', i, 'is', n +``` + + pos 0 is 2 + pos 1 is 4 + pos 2 is 6 + + +迭代器对象必须实现 `__iter__` 方法: + + +```python +x = [2, 4, 6] +i = x.__iter__() +print i +``` + + + + +`__iter__()` 返回的对象支持 `next` 方法,返回迭代器中的下一个元素: + + +```python +print i.next() +``` + + 2 + + +当下一个元素不存在时,会 `raise` 一个 `StopIteration` 错误: + + +```python +print i.next() +print i.next() +``` + + 4 + 6 + + + +```python +i.next() +``` + + + --------------------------------------------------------------------------- + + StopIteration Traceback (most recent call last) + + in () + ----> 1 i.next() + + + StopIteration: + + +很多标准库函数返回的是迭代器: + + +```python +r = reversed(x) +print r +``` + + + + +调用它的 `next()` 方法: + + +```python +print r.next() +print r.next() +print r.next() +``` + + 6 + 4 + 2 + + +字典对象的 `iterkeys, itervalues, iteritems` 方法返回的都是迭代器: + + +```python +x = {'a':1, 'b':2, 'c':3} +i = x.iteritems() +print i +``` + + + + +迭代器的 `__iter__` 方法返回它本身: + + +```python +print i.__iter__() +``` + + + + + +```python +print i.next() +``` + + ('a', 1) + + +## 自定义迭代器 + +自定义一个 list 的取反迭代器: + + +```python +class ReverseListIterator(object): + + def __init__(self, list): + self.list = list + self.index = len(list) + + def __iter__(self): + return self + + def next(self): + self.index -= 1 + if self.index >= 0: + return self.list[self.index] + else: + raise StopIteration +``` + + +```python +x = range(10) +for i in ReverseListIterator(x): + print i, +``` + + 9 8 7 6 5 4 3 2 1 0 + + +只要我们定义了这三个方法,我们可以返回任意迭代值: + + +```python +class Collatz(object): + + def __init__(self, start): + self.value = start + + def __iter__(self): + return self + + def next(self): + if self.value == 1: + raise StopIteration + elif self.value % 2 == 0: + self.value = self.value / 2 + else: + self.value = 3 * self.value + 1 + return self.value +``` + +这里我们实现 [Collatz 猜想](http://baike.baidu.com/view/736196.htm): + +- 奇数 n:返回 3n + 1 +- 偶数 n:返回 n / 2 + +直到 n 为 1 为止: + + +```python +for x in Collatz(7): + print x, +``` + + 22 11 34 17 52 26 13 40 20 10 5 16 8 4 2 1 + + +不过迭代器对象存在状态,会出现这样的问题: + + +```python +i = Collatz(7) +for x, y in zip(i, i): + print x, y +``` + + 22 11 + 34 17 + 52 26 + 13 40 + 20 10 + 5 16 + 8 4 + 2 1 + + +一个比较好的解决方法是将迭代器和可迭代对象分开处理,这里提供了一个二分树的中序遍历实现: + + +```python +class BinaryTree(object): + def __init__(self, value, left=None, right=None): + self.value = value + self.left = left + self.right = right + + def __iter__(self): + return InorderIterator(self) +``` + + +```python +class InorderIterator(object): + + def __init__(self, node): + self.node = node + self.stack = [] + + def next(self): + if len(self.stack) > 0 or self.node is not None: + while self.node is not None: + self.stack.append(self.node) + self.node = self.node.left + node = self.stack.pop() + self.node = node.right + return node.value + else: + raise StopIteration() +``` + + +```python +tree = BinaryTree( + left=BinaryTree( + left=BinaryTree(1), + value=2, + right=BinaryTree( + left=BinaryTree(3), + value=4, + right=BinaryTree(5) + ), + ), + value=6, + right=BinaryTree( + value=7, + right=BinaryTree(8) + ) +) +``` + + +```python +for value in tree: + print value, +``` + + 1 2 3 4 5 6 7 8 + + +不会出现之前的问题: + + +```python +for x,y in zip(tree, tree): + print x, y +``` + + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + diff --git a/docs/05-advanced-python/05.10-generators.md b/docs/05-advanced-python/05.10-generators.md new file mode 100644 index 00000000..a53ff9b8 --- /dev/null +++ b/docs/05-advanced-python/05.10-generators.md @@ -0,0 +1,216 @@ + +# 生成器 + +`while` 循环通常有这样的形式: + +```python + +result = [] +while True: + + result.append(value) + if : + break +``` + +使用迭代器实现这样的循环: + +```python +class GenericIterator(object): + def __init__(self, ...): + + # 需要额外储存状态 + + def next(self): + + + if : + raise StopIteration() + + return value +``` + +更简单的,可以使用生成器: + +```python +def generator(...): + + while True: + + # yield 说明这个函数可以返回多个值! + yield value + if : + break +``` + +生成器使用 `yield` 关键字将值输出,而迭代器则通过 `next` 的 `return` 将值返回;与迭代器不同的是,生成器会自动记录当前的状态,而迭代器则需要进行额外的操作来记录当前的状态。 + +对于之前的 `collatz` 猜想,简单循环的实现如下: + + +```python +def collatz(n): + sequence = [] + while n != 1: + if n % 2 == 0: + n /= 2 + else: + n = 3*n + 1 + sequence.append(n) + return sequence + +for x in collatz(7): + print x, +``` + + 22 11 34 17 52 26 13 40 20 10 5 16 8 4 2 1 + + +迭代器的版本如下: + + +```python +class Collatz(object): + def __init__(self, start): + self.value = start + + def __iter__(self): + return self + + def next(self): + if self.value == 1: + raise StopIteration() + elif self.value % 2 == 0: + self.value = self.value/2 + else: + self.value = 3*self.value + 1 + return self.value + +for x in Collatz(7): + print x, +``` + + 22 11 34 17 52 26 13 40 20 10 5 16 8 4 2 1 + + +生成器的版本如下: + + +```python +def collatz(n): + while n != 1: + if n % 2 == 0: + n /= 2 + else: + n = 3*n + 1 + yield n + +for x in collatz(7): + print x, +``` + + 22 11 34 17 52 26 13 40 20 10 5 16 8 4 2 1 + + +事实上,生成器也是一种迭代器: + + +```python +x = collatz(7) +print x +``` + + + + +它支持 `next` 方法,返回下一个 `yield` 的值: + + +```python +print x.next() +print x.next() +``` + + 22 + 11 + + +`__iter__` 方法返回的是它本身: + + +```python +print x.__iter__() +``` + + + + +之前的二叉树迭代器可以改写为更简单的生成器模式来进行中序遍历: + + +```python +class BinaryTree(object): + def __init__(self, value, left=None, right=None): + self.value = value + self.left = left + self.right = right + + def __iter__(self): + # 将迭代器设为生成器方法 + return self.inorder() + + def inorder(self): + # traverse the left branch + if self.left is not None: + for value in self.left: + yield value + + # yield node's value + yield self.value + + # traverse the right branch + if self.right is not None: + for value in self.right: + yield value +``` + +非递归的实现: + + +```python +def inorder(self): + node = self + stack = [] + while len(stack) > 0 or node is not None: + while node is not None: + stack.append(node) + node = node.left + node = stack.pop() + yield node.value + node = node.right +``` + + +```python +tree = BinaryTree( + left=BinaryTree( + left=BinaryTree(1), + value=2, + right=BinaryTree( + left=BinaryTree(3), + value=4, + right=BinaryTree(5) + ), + ), + value=6, + right=BinaryTree( + value=7, + right=BinaryTree(8) + ) +) +for value in tree: + print value, +``` + + 1 2 3 4 5 6 7 8 + diff --git a/docs/05-advanced-python/05.11-context-managers-and-the-with-statement.md b/docs/05-advanced-python/05.11-context-managers-and-the-with-statement.md new file mode 100644 index 00000000..a94b2b83 --- /dev/null +++ b/docs/05-advanced-python/05.11-context-managers-and-the-with-statement.md @@ -0,0 +1,451 @@ + +# with 语句和上下文管理器 + +```python +# create/aquire some resource +... +try: + # do something with the resource + ... +finally: + # destroy/release the resource + ... +``` + +处理文件,线程,数据库,网络编程等等资源的时候,我们经常需要使用上面这样的代码形式,以确保资源的正常使用和释放。 + +好在`Python` 提供了 `with` 语句帮我们自动进行这样的处理,例如之前在打开文件时我们使用: + + +```python +with open('my_file', 'w') as fp: + # do stuff with fp + data = fp.write("Hello world") +``` + +这等效于下面的代码,但是要更简便: + + +```python +fp = open('my_file', 'w') +try: + # do stuff with f + data = fp.write("Hello world") +finally: + fp.close() +``` + +## 上下文管理器 + +其基本用法如下: +``` +with : + +``` + +`` 执行的结果应当返回一个实现了上下文管理器的对象,即实现这样两个方法,`__enter__` 和 `__exit__`: + + +```python +print fp.__enter__ +print fp.__exit__ +``` + + + + + +`__enter__` 方法在 `` 执行前执行,而 `__exit__` 在 `` 执行结束后执行: + +比如可以这样定义一个简单的上下文管理器: + + +```python +class ContextManager(object): + + def __enter__(self): + print "Entering" + + def __exit__(self, exc_type, exc_value, traceback): + print "Exiting" +``` + +使用 `with` 语句执行: + + +```python +with ContextManager(): + print " Inside the with statement" +``` + + Entering + Inside the with statement + Exiting + + +即使 `` 中执行的内容出错,`__exit__` 也会被执行: + + +```python +with ContextManager(): + print 1/0 +``` + + Entering + Exiting + + + + --------------------------------------------------------------------------- + + ZeroDivisionError Traceback (most recent call last) + + in () + 1 with ContextManager(): + ----> 2 print 1/0 + + + ZeroDivisionError: integer division or modulo by zero + + +## `__`enter`__` 的返回值 + +如果在 `__enter__` 方法下添加了返回值,那么我们可以使用 `as` 把这个返回值传给某个参数: + + +```python +class ContextManager(object): + + def __enter__(self): + print "Entering" + return "my value" + + def __exit__(self, exc_type, exc_value, traceback): + print "Exiting" +``` + +将 `__enter__` 返回的值传给 `value` 变量: + + +```python +with ContextManager() as value: + print value +``` + + Entering + my value + Exiting + + +一个通常的做法是将 `__enter__` 的返回值设为这个上下文管理器对象本身,文件对象就是这样做的: + + +```python +fp = open('my_file', 'r') +print fp.__enter__() +fp.close() +``` + + + + + +```python +import os +os.remove('my_file') +``` + +实现方法非常简单: + + +```python +class ContextManager(object): + + def __enter__(self): + print "Entering" + return self + + def __exit__(self, exc_type, exc_value, traceback): + print "Exiting" +``` + + +```python +with ContextManager() as value: + print value +``` + + Entering + <__main__.ContextManager object at 0x0000000003D48828> + Exiting + + +## 错误处理 + +上下文管理器对象将错误处理交给 `__exit__` 进行,可以将错误类型,错误值和 `traceback` 等内容作为参数传递给 `__exit__` 函数: + + +```python +class ContextManager(object): + + def __enter__(self): + print "Entering" + + def __exit__(self, exc_type, exc_value, traceback): + print "Exiting" + if exc_type is not None: + print " Exception:", exc_value +``` + +如果没有错误,这些值都将是 `None`, 当有错误发生的时候: + + +```python +with ContextManager(): + print 1/0 +``` + + Entering + Exiting + Exception: integer division or modulo by zero + + + + --------------------------------------------------------------------------- + + ZeroDivisionError Traceback (most recent call last) + + in () + 1 with ContextManager(): + ----> 2 print 1/0 + + + ZeroDivisionError: integer division or modulo by zero + + +在这个例子中,我们只是简单的显示了错误的值,并没有对错误进行处理,所以错误被向上抛出了,如果不想让错误抛出,只需要将 `__exit__` 的返回值设为 `True`: + + +```python +class ContextManager(object): + + def __enter__(self): + print "Entering" + + def __exit__(self, exc_type, exc_value, traceback): + print "Exiting" + if exc_type is not None: + print " Exception suppresed:", exc_value + return True +``` + + +```python +with ContextManager(): + print 1/0 +``` + + Entering + Exiting + Exception suppresed: integer division or modulo by zero + + +在这种情况下,错误就不会被向上抛出。 + +## 数据库的例子 + +对于数据库的 transaction 来说,如果没有错误,我们就将其 `commit` 进行保存,如果有错误,那么我们将其回滚到上一次成功的状态。 + + +```python +class Transaction(object): + + def __init__(self, connection): + self.connection = connection + + def __enter__(self): + return self.connection.cursor() + + def __exit__(self, exc_type, exc_value, traceback): + if exc_value is None: + # transaction was OK, so commit + self.connection.commit() + else: + # transaction had a problem, so rollback + self.connection.rollback() +``` + +建立一个数据库,保存一个地址表: + + +```python +import sqlite3 as db +connection = db.connect(":memory:") + +with Transaction(connection) as cursor: + cursor.execute("""CREATE TABLE IF NOT EXISTS addresses ( + address_id INTEGER PRIMARY KEY, + street_address TEXT, + city TEXT, + state TEXT, + country TEXT, + postal_code TEXT + )""") +``` + +插入数据: + + +```python +with Transaction(connection) as cursor: + cursor.executemany("""INSERT OR REPLACE INTO addresses VALUES (?, ?, ?, ?, ?, ?)""", [ + (0, '515 Congress Ave', 'Austin', 'Texas', 'USA', '78701'), + (1, '245 Park Avenue', 'New York', 'New York', 'USA', '10167'), + (2, '21 J.J. Thompson Ave.', 'Cambridge', None, 'UK', 'CB3 0FA'), + (3, 'Supreme Business Park', 'Hiranandani Gardens, Powai, Mumbai', 'Maharashtra', 'India', '400076'), + ]) +``` + +假设插入数据之后出现了问题: + + +```python +with Transaction(connection) as cursor: + cursor.execute("""INSERT OR REPLACE INTO addresses VALUES (?, ?, ?, ?, ?, ?)""", + (4, '2100 Pennsylvania Ave', 'Washington', 'DC', 'USA', '78701'), + ) + raise Exception("out of addresses") +``` + + + --------------------------------------------------------------------------- + + Exception Traceback (most recent call last) + + in () + 3 (4, '2100 Pennsylvania Ave', 'Washington', 'DC', 'USA', '78701'), + 4 ) + ----> 5 raise Exception("out of addresses") + + + Exception: out of addresses + + +那么最新的一次插入将不会被保存,而是返回上一次 `commit` 成功的状态: + + +```python +cursor.execute("SELECT * FROM addresses") +for row in cursor: + print row +``` + + (0, u'515 Congress Ave', u'Austin', u'Texas', u'USA', u'78701') + (1, u'245 Park Avenue', u'New York', u'New York', u'USA', u'10167') + (2, u'21 J.J. Thompson Ave.', u'Cambridge', None, u'UK', u'CB3 0FA') + (3, u'Supreme Business Park', u'Hiranandani Gardens, Powai, Mumbai', u'Maharashtra', u'India', u'400076') + + +## contextlib 模块 + +很多的上下文管理器有很多相似的地方,为了防止写入很多重复的模式,可以使用 `contextlib` 模块来进行处理。 + +最简单的处理方式是使用 `closing` 函数确保对象的 `close()` 方法始终被调用: + + +```python +from contextlib import closing +import urllib + +with closing(urllib.urlopen('http://www.baidu.com')) as url: + html = url.read() + +print html[:100] +``` + + + + +查看函数拥有的方法: + + +```python +dir(foo) +``` + + + + + ['__call__', + '__class__', + '__closure__', + '__code__', + '__defaults__', + '__delattr__', + '__dict__', + '__doc__', + '__format__', + '__get__', + '__getattribute__', + '__globals__', + '__hash__', + '__init__', + '__module__', + '__name__', + '__new__', + '__reduce__', + '__reduce_ex__', + '__repr__', + '__setattr__', + '__sizeof__', + '__str__', + '__subclasshook__', + 'func_closure', + 'func_code', + 'func_defaults', + 'func_dict', + 'func_doc', + 'func_globals', + 'func_name'] + + + +在这些方法中,`__call__` 是最重要的一种方法: + + +```python +foo.__call__(42) +``` + + 42 + + +相当于: + + +```python +foo(42) +``` + + 42 + + +因为函数是对象,所以函数可以作为参数传入另一个函数: + + +```python +def bar(f, x): + x += 1 + f(x) +``` + + +```python +bar(foo, 4) +``` + + 5 + + +## 修饰符 + +修饰符是这样的一种函数,它接受一个函数作为输入,通常输出也是一个函数: + + +```python +def dec(f): + print 'I am decorating function', id(f) + return f +``` + +将 `len` 函数作为参数传入这个修饰符函数: + + +```python +declen = dec(len) +``` + + I am decorating function 33716168 + + +使用这个新生成的函数: + + +```python +declen([10,20,30]) +``` + + + + + 3 + + + +上面的例子中,我们仅仅返回了函数的本身,也可以利用这个函数生成一个新的函数,看一个新的例子: + + +```python +def loud(f): + def new_func(*args, **kw): + print 'calling with', args, kw + rtn = f(*args, **kw) + print 'return value is', rtn + return rtn + return new_func +``` + + +```python +loudlen = loud(len) +``` + + +```python +loudlen([10, 20, 30]) +``` + + calling with ([10, 20, 30],) {} + return value is 3 + + + + + + 3 + + + +## 用 @ 来使用修饰符 + +`Python` 使用 `@` 符号来将某个函数替换为修饰符之后的函数: + +例如这个函数: + + +```python +def foo(x): + print x + +foo = dec(foo) +``` + + I am decorating function 64021672 + + +可以替换为: + + +```python +@dec +def foo(x): + print x +``` + + I am decorating function 64021112 + + +事实上,如果修饰符返回的是一个函数,那么可以链式的使用修饰符: + +```python +@dec1 +@dec2 +def foo(x): + print x +``` + +使用修饰符 `loud` 来定义这个函数: + + +```python +@loud +def foo(x): + print x +``` + + +```python +foo(42) +``` + + calling with (42,) {} + 42 + return value is None + + +## 例子 + +定义两个修饰器函数,一个将原来的函数值加一,另一个乘二: + + +```python +def plus_one(f): + def new_func(x): + return f(x) + 1 + return new_func + +def times_two(f): + def new_func(x): + return f(x) * 2 + return new_func +``` + +定义函数,先乘二再加一: + + +```python +@plus_one +@times_two +def foo(x): + return int(x) +``` + + +```python +foo(13) +``` + + + + + 27 + + + +## 修饰器工厂 + +`decorators factories` 是返回修饰器的函数,例如: + + +```python +def super_dec(x, y, z): + def dec(f): + def new_func(*args, **kw): + print x + y + z + return f(*args, **kw) + return new_func + return dec +``` + +它的作用在于产生一个可以接受参数的修饰器,例如我们想将 `loud` 输出的内容写入一个文件去,可以这样做: + + +```python +def super_loud(filename): + fp = open(filename, 'w') + def loud(f): + def new_func(*args, **kw): + fp.write('calling with' + str(args) + str(kw)) + # 确保内容被写入 + fp.flush() + fp.close() + rtn = f(*args, **kw) + return rtn + return new_func + return loud +``` + +可以这样使用这个修饰器工厂: + + +```python +@super_loud('test.txt') +def foo(x): + print x +``` + +调用 `foo` 就会在文件中写入内容: + + +```python +foo(12) +``` + + 12 + + +查看文件内容: + + +```python +with open('test.txt') as fp: + print fp.read() +``` + + calling with(12,){} + + + +```python +import os +os.remove('test.txt') +``` diff --git a/docs/05-advanced-python/05.13-decorator-usage.md b/docs/05-advanced-python/05.13-decorator-usage.md new file mode 100644 index 00000000..f4849555 --- /dev/null +++ b/docs/05-advanced-python/05.13-decorator-usage.md @@ -0,0 +1,263 @@ + +# 修饰符的使用 + +## @classmethod 修饰符 + +在 `Python` 标准库中,有很多自带的修饰符,例如 `classmethod` 将一个对象方法转换了类方法: + + +```python +class Foo(object): + @classmethod + def bar(cls, x): + print 'the input is', x + + def __init__(self): + pass + +``` + +类方法可以通过 `类名.方法` 来调用: + + +```python +Foo.bar(12) +``` + + the input is 12 + + +## @property 修饰符 + +有时候,我们希望像 __Java__ 一样支持 `getters` 和 `setters` 的方法,这时候就可以使用 `property` 修饰符: + + +```python +class Foo(object): + def __init__(self, data): + self.data = data + + @property + def x(self): + return self.data +``` + +此时可以使用 `.x` 这个属性查看数据(不需要加上括号): + + +```python +foo = Foo(23) +foo.x +``` + + + + + 23 + + + +这样做的好处在于,这个属性是只读的: + + +```python +foo.x = 1 +``` + + + --------------------------------------------------------------------------- + + AttributeError Traceback (most recent call last) + + in () + ----> 1 foo.x = 1 + + + AttributeError: can't set attribute + + +如果想让它变成可读写,可以加上一个修饰符 `@x.setter`: + + +```python +class Foo(object): + def __init__(self, data): + self.data = data + + @property + def x(self): + return self.data + + @x.setter + def x(self, value): + self.data = value +``` + + +```python +foo = Foo(23) +print foo.x +``` + + 23 + + +可以通过属性改变它的值: + + +```python +foo.x = 1 +print foo.x +``` + + 1 + + +## Numpy 的 @vectorize 修饰符 + +`numpy` 的 `vectorize` 函数讲一个函数转换为 `ufunc`,事实上它也是一个修饰符: + + +```python +from numpy import vectorize, arange + +@vectorize +def f(x): + if x <= 0: + return x + else: + return 0 + +f(arange(-10.0,10.0)) +``` + + + + + array([-10., -9., -8., -7., -6., -5., -4., -3., -2., -1., 0., + 0., 0., 0., 0., 0., 0., 0., 0., 0.]) + + + +## 注册一个函数 + +来看这样的一个例子,定义一个类: + + +```python +class Registry(object): + def __init__(self): + self._data = {} + def register(self, f, name=None): + if name == None: + name = f.__name__ + self._data[name] = f + setattr(self, name, f) +``` + +`register` 方法接受一个函数,将这个函数名作为属性注册到对象中。 + +产生该类的一个对象: + + +```python +registry = Registry() +``` + +使用该对象的 `register` 方法作为修饰符: + + +```python +@registry.register +def greeting(): + print "hello world" +``` + +这样这个函数就被注册到 `registry` 这个对象中去了: + + +```python +registry._data +``` + + + + + {'greeting': } + + + + +```python +registry.greeting +``` + + + + + + + + +[flask](flask.pocoo.org) ,一个常用的网络应用,处理 url 的机制跟这个类似。 + +## 使用 @wraps + +一个通常的问题在于: + + +```python +def logging_call(f): + def wrapper(*a, **kw): + print 'calling {}'.format(f.__name__) + return f(*a, **kw) + return wrapper + +@logging_call +def square(x): + ''' + square function. + ''' + return x ** 2 + +print square.__doc__, square.__name__ +``` + + None wrapper + + +我们使用修饰符之后,`square` 的 `metadata` 完全丢失了,返回的函数名与函数的 `docstring` 都不对。 + +一个解决的方法是从 `functools` 模块导入 `wraps` 修饰符来修饰我们的修饰符: + + +```python +import functools + +def logging_call(f): + @functools.wraps(f) + def wrapper(*a, **kw): + print 'calling {}'.format(f.__name__) + return f(*a, **kw) + return wrapper + +@logging_call +def square(x): + ''' + square function. + ''' + return x ** 2 + +print square.__doc__, square.__name__ +``` + + + square function. + square + + +现在这个问题解决了,所以在自定义修饰符方法的时候为了避免出现不必要的麻烦,尽量使用 `wraps` 来修饰修饰符! + +## Class 修饰符 + +与函数修饰符类似,类修饰符是这样一类函数,接受一个类作为参数,通常返回一个新的类。 diff --git a/docs/05-advanced-python/05.14-the-operator-functools-itertools-toolz-fn-funcy-module.md b/docs/05-advanced-python/05.14-the-operator-functools-itertools-toolz-fn-funcy-module.md new file mode 100644 index 00000000..5ff59801 --- /dev/null +++ b/docs/05-advanced-python/05.14-the-operator-functools-itertools-toolz-fn-funcy-module.md @@ -0,0 +1,140 @@ + +# operator, functools, itertools, toolz, fn, funcy 模块 + +## operator 模块 + + +```python +import operator as op +``` + +`operator` 模块提供了各种操作符(`+,*,[]`)的函数版本方便使用: + +加法: + + +```python +print reduce(op.add, range(10)) +``` + + 45 + + +乘法: + + +```python +print reduce(op.mul, range(1,10)) +``` + + 362880 + + +`[]`: + + +```python +my_list = [('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)] + +# 标准排序 +print sorted(my_list) + +# 使用元素的第二个元素排序 +print sorted(my_list, key=op.itemgetter(1)) + +# 使用第一个元素的长度进行排序: +print sorted(my_list, key=lambda x: len(x[0])) +``` + + [('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)] + [('a', 1), ('ccc', 2), ('dddd', 3), ('bb', 4)] + [('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)] + + +## functools 模块 + +`functools` 包含很多跟函数相关的工具,比如之前看到的 `wraps` 函数,不过最常用的是 `partial` 函数,这个函数允许我们使用一个函数中生成一个新函数,这个函数使用原来的函数,不过某些参数被指定了: + + +```python +from functools import partial + +# 将 reduce 的第一个参数指定为加法,得到的是类似求和的函数 +sum_ = partial(reduce, op.add) + +# 将 reduce 的第一个参数指定为乘法,得到的是类似求连乘的函数 +prod_ = partial(reduce, op.mul) + +print sum_([1,2,3,4]) +print prod_([1,2,3,4]) +``` + + 10 + 24 + + +`partial` 函数还可以按照键值对传入固定参数。 + +## itertools 模块 + +`itertools` 包含很多与迭代器对象相关的工具,其中比较常用的是排列组合生成器 `permutations` 和 `combinations`,还有在数据分析中常用的 `groupby` 生成器: + + +```python +from itertools import cycle, groupby, islice, permutations, combinations +``` + +`cycle` 返回一个无限的迭代器,按照顺序重复输出输入迭代器中的内容,`islice` 则返回一个迭代器中的一段内容: + + +```python +print list(islice(cycle('abcd'), 0, 10)) +``` + + ['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b'] + + +`groupby` 返回一个字典,按照指定的 `key` 对一组数据进行分组,字典的键是 `key`,值是一个迭代器: + + +```python +animals = sorted(['pig', 'cow', 'giraffe', 'elephant', + 'dog', 'cat', 'hippo', 'lion', 'tiger'], key=len) + +# 按照长度进行分组 +for k, g in groupby(animals, key=len): + print k, list(g) +print +``` + + 3 ['pig', 'cow', 'dog', 'cat'] + 4 ['lion'] + 5 ['hippo', 'tiger'] + 7 ['giraffe'] + 8 ['elephant'] + + + +排列: + + +```python +print [''.join(p) for p in permutations('abc')] +``` + + ['abc', 'acb', 'bac', 'bca', 'cab', 'cba'] + + +组合: + + +```python +print [list(c) for c in combinations([1,2,3,4], r=2)] +``` + + [[1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]] + + +## toolz, fn 和 funcy 模块 + +这三个模块的作用是方便我们在编程的时候使用函数式编程的风格。 diff --git a/docs/05-advanced-python/05.15-scope.md b/docs/05-advanced-python/05.15-scope.md new file mode 100644 index 00000000..54b71ed8 --- /dev/null +++ b/docs/05-advanced-python/05.15-scope.md @@ -0,0 +1,251 @@ + +# 作用域 + +在函数中,`Python` 从命名空间中寻找变量的顺序如下: + +- `local function scope` +- `enclosing scope` +- `global scope` +- `builtin scope` + +例子: + +# local 作用域 + + +```python +def foo(a,b): + c = 1 + d = a + b + c +``` + +这里所有的变量都在 `local` 作用域。 + +## global 作用域 + + +```python +c = 1 +def foo(a,b): + d = a + b + c +``` + +这里的 `c` 就在 `global` 作用域。 + +## global 关键词 + +使用 `global` 关键词可以在 `local` 作用域中修改 `global` 作用域的值。 + + +```python +c = 1 +def foo(): + global c + c = 2 + +print c +foo() +print c +``` + + 1 + 2 + + +其作用是将 `c` 指向 `global` 中的 `c`。 + +如果不加关键词,那么 `local` 作用域的 `c` 不会影响 `global` 作用域中的值: + + +```python +c = 1 +def foo(): + c = 2 + +print c +foo() +print c +``` + + 1 + 1 + + +## built-in 作用域 + + +```python +def list_length(a): + return len(a) + +a = [1,2,3] +print list_length(a) +``` + + 3 + + +这里函数 `len` 就是在 `built-in` 作用域中: + + +```python +import __builtin__ + +__builtin__.len +``` + + + + + + + + +## class 中的作用域 + +Global | MyClass +---|--- +`var = 0`
`MyClass`
`access_class` | `var = 1`
`access_class` + + +```python +# global +var = 0 + +class MyClass(object): + # class variable + var = 1 + + def access_class_c(self): + print 'class var:', self.var + + def write_class_c(self): + MyClass.var = 2 + print 'class var:', self.var + + def access_global_c(self): + print 'global var:', var + + def write_instance_c(self): + self.var = 3 + print 'instance var:', self.var +``` + +Global | MyClass | obj +---|---|---- +`var = 0`
`MyClass`
[`access_class`]
`obj` | `var = 1`
`access_class` | + + +```python +obj = MyClass() +``` + +查询 `self.var` 时,由于 `obj` 不存在 `var`,所以跳到 MyClass 中: + +Global | MyClass | obj +---|---|---- +`var = 0`
`MyClass`
[`access_class`
`self`]
`obj` | `var = 1`
`access_class` | + + +```python +obj.access_class_c() +``` + + class var: 1 + + +查询 `var` 直接跳到 `global` 作用域: + +Global | MyClass | obj +---|---|---- +`var = 0`
`MyClass`
[`access_class`
`self`]
`obj` | `var = 1`
`access_class` | + + +```python +obj.access_global_c() +``` + + global var: 0 + + +修改类中的 `MyClass.var`: + +Global | MyClass | obj +---|---|---- +`var = 0`
`MyClass`
[`access_class`
`self`]
`obj` | `var = 2`
`access_class` | + + +```python +obj.write_class_c() +``` + + class var: 2 + + +修改实例中的 `var` 时,会直接在 `obj` 域中创建一个: + +Global | MyClass | obj +---|---|---- +`var = 0`
`MyClass`
[`access_class`
`self`]
`obj` | `var = 2`
`access_class` | `var = 3` + + +```python +obj.write_instance_c() +``` + + instance var: 3 + + + +```python +MyClass.var +``` + + + + + 2 + + + +`MyClass` 中的 `var` 并没有改变。 + +## 词法作用域 + +对于嵌套函数: + + +```python +def outer(): + a = 1 + def inner(): + print "a =", a + inner() + +outer() +``` + + a = 1 + + +如果里面的函数没有找到变量,那么会向外一层寻找变量,如果再找不到,则到 `global` 作用域。 + +返回的是函数的情况: + + +```python +def outer(): + a = 1 + def inner(): + return a + return inner + +func = outer() + +print 'a (1):', func() +``` + + a (1): 1 + + +func() 函数中调用的 `a` 要从它定义的地方开始寻找,而不是在 `func` 所在的作用域寻找。 diff --git a/docs/05-advanced-python/05.16-dynamic-code-execution.md b/docs/05-advanced-python/05.16-dynamic-code-execution.md new file mode 100644 index 00000000..858f0284 --- /dev/null +++ b/docs/05-advanced-python/05.16-dynamic-code-execution.md @@ -0,0 +1,195 @@ + +# 动态编译 + +## 标准编程语言 + +对于 **C** 语言,代码一般要先编译,再执行。 + + .c -> .exe + +## 解释器语言 + +shell 脚本 + + .sh -> interpreter + +## Byte Code 编译 + +**Python, Java** 等语言先将代码编译为 byte code(不是机器码),然后再处理: + + .py -> .pyc -> interpreter + +## eval 函数 + + eval(statement, glob, local) + +使用 `eval` 函数动态执行代码,返回执行的值: + + +```python +a = 1 + +eval("a+1") +``` + + + + + 2 + + + +可以接收明明空间参数: + + +```python +local = dict(a=2) +glob = {} +eval("a+1", glob, local) +``` + + + + + 3 + + + +这里 `local` 中的 `a` 先被找到。 + +## exec 函数 + + exec(statement, glob, local) + +使用 `exec` 可以添加修改原有的变量。 + + +```python +a = 1 + +exec("b = a+1") + +print b +``` + + 2 + + + +```python +local = dict(a=2) +glob = {} +exec("b = a+1", glob, local) + +print local +``` + + {'a': 2, 'b': 3} + + +执行之后,`b` 在 `local` 命名空间中。 + +## 警告 + +动态执行的时候要注意,不要执行不信任的用户输入,因为它们拥有 `Python` 的全部权限。 + +## compile 函数生成 byte code + + compile(str, filename, mode) + + +```python +a = 1 +c = compile("a+2", "", 'eval') + +eval(c) +``` + + + + + 3 + + + + +```python +a = 1 +c = compile("b=a+2", "", 'exec') + +exec(c) +b +``` + + + + + 3 + + + +## abstract syntax trees + + +```python +import ast +``` + + +```python +tree = ast.parse("a+2", "", "eval") + +ast.dump(tree) +``` + + + + + "Expression(body=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Num(n=2)))" + + + +改变常数的值: + + +```python +tree.body.right.n = 3 + +ast.dump(tree) +``` + + + + + "Expression(body=BinOp(left=Name(id='a', ctx=Load()), op=Add(), right=Num(n=3)))" + + + + +```python +a = 1 +c = compile(tree, '', 'eval') + +eval(c) +``` + + + + + 4 + + + +安全的使用方法 `literal_eval` ,只支持基本值的操作: + + +```python +ast.literal_eval("[10.0, 2, True, 'foo']") +``` + + + + + [10.0, 2, True, 'foo'] + + diff --git a/docs/06-matplotlib/06.01-pyplot-tutorial.md b/docs/06-matplotlib/06.01-pyplot-tutorial.md new file mode 100644 index 00000000..fcef96a9 --- /dev/null +++ b/docs/06-matplotlib/06.01-pyplot-tutorial.md @@ -0,0 +1,410 @@ + +# Pyplot 教程 + +## Matplotlib 简介 + +**`matplotlib`** 是一个 **`Python`** 的 `2D` 图形包。 + +在线文档:http://matplotlib.org ,提供了 [Examples](http://matplotlib.org/examples/index.html), [FAQ](http://matplotlib.org/faq/index.html), [API](http://matplotlib.org/contents.html), [Gallery](http://matplotlib.org/gallery.html),其中 [Gallery](http://matplotlib.org/gallery.html) 是很有用的一个部分,因为它提供了各种画图方式的可视化,方便用户根据需求进行选择。 + +## 使用 Pyplot + +导入相关的包: + + +```python +import numpy as np +import matplotlib.pyplot as plt +``` + +`matplotlib.pyplot` 包含一系列类似 **`MATLAB`** 中绘图函数的相关函数。每个 `matplotlib.pyplot` 中的函数对当前的图像进行一些修改,例如:产生新的图像,在图像中产生新的绘图区域,在绘图区域中画线,给绘图加上标记,等等…… `matplotlib.pyplot` 会自动记住当前的图像和绘图区域,因此这些函数会直接作用在当前的图像上。 + +下文中,以 `plt` 作为 `matplotlib.pyplot` 的省略。 + +## plt.show() 函数 + +默认情况下,`matplotlib.pyplot` 不会直接显示图像,只有调用 `plt.show()` 函数时,图像才会显示出来。 + +`plt.show()` 默认是在新窗口打开一幅图像,并且提供了对图像进行操作的按钮。 + +不过在 `ipython` 命令行中,我们可以使用 `magic` 命令将它插入 `notebook` 中,并且不需要调用 `plt.show()` 也可以显示: + +- `%matplotlib notebook` +- `%matplotlib inline` + +不过在实际写程序中,我们还是需要调用 `plt.show()` 函数将图像显示出来。 + +这里我们使图像输出在 `notebook` 中: + + +```python +%matplotlib inline +``` + +## plt.plot() 函数 + +### 例子 + +`plt.plot()` 函数可以用来绘图: + + +```python +plt.plot([1,2,3,4]) +plt.ylabel('some numbers') + +plt.show() +``` + + +![png](output_13_0.png) + + +### 基本用法 + +`plot` 函数基本的用法有以下四种: + +默认参数 +- `plt.plot(x,y)` + +指定参数 +- `plt.plot(x,y, format_str)` + +默认参数,`x` 为 `0~N-1` +- `plt.plot(y)` + +指定参数,`x` 为 `0~N-1` +- `plt.plot(y, format_str)` + +因此,在上面的例子中,我们没有给定 `x` 的值,所以其默认值为 `[0,1,2,3]`。 + +传入 `x` 和 `y`: + + +```python +plt.plot([1,2,3,4], [1,4,9,16]) +``` + + + + + [] + + + + +![png](output_16_1.png) + + +### 字符参数 + +和 **`MATLAB`** 中类似,我们还可以用字符来指定绘图的格式: + +表示颜色的字符参数有: + +字符 | 颜色 +-- | -- +`‘b’`| 蓝色,blue +`‘g’`| 绿色,green +`‘r’`| 红色,red +`‘c’`| 青色,cyan +`‘m’`| 品红,magenta +`‘y’`| 黄色,yellow +`‘k’`| 黑色,black +`‘w’`| 白色,white + +表示类型的字符参数有: + +字符|类型 | 字符|类型 +---|--- | --- | --- +` '-' `| 实线 | `'--'`| 虚线 +`'-.'`| 虚点线 | `':'`| 点线 +`'.'`| 点 | `','`| 像素点 +`'o'` |圆点 | `'v'`| 下三角点 +`'^'`| 上三角点 | `'<'`| 左三角点 +`'>'`| 右三角点 | `'1'`| 下三叉点 +`'2'`| 上三叉点 | `'3'`| 左三叉点 +`'4'`| 右三叉点 | `'s'`| 正方点 +`'p'` | 五角点 | `'*'`| 星形点 +`'h'`| 六边形点1 | `'H'`| 六边形点2 +`'+'`| 加号点 | `'x'`| 乘号点 +`'D'`| 实心菱形点 | `'d'`| 瘦菱形点 +`'_'`| 横线点 | | + +例如我们要画出红色圆点: + + +```python +plt.plot([1,2,3,4], [1,4,9,16], 'ro') +plt.show() +``` + + +![png](output_19_0.png) + + +可以看出,有两个点在图像的边缘,因此,我们需要改变轴的显示范围。 + +### 显示范围 + +与 **`MATLAB`** 类似,这里可以使用 `axis` 函数指定坐标轴显示的范围: + + plt.axis([xmin, xmax, ymin, ymax]) + + +```python +plt.plot([1,2,3,4], [1,4,9,16], 'ro') +# 指定 x 轴显示区域为 0-6,y 轴为 0-20 +plt.axis([0,6,0,20]) +plt.show() +``` + + +![png](output_23_0.png) + + +### 传入 `Numpy` 数组 + +之前我们传给 `plot` 的参数都是列表,事实上,向 `plot` 中传入 `numpy` 数组是更常用的做法。事实上,如果传入的是列表,`matplotlib` 会在内部将它转化成数组再进行处理: + + +```python +import numpy as np +import matplotlib.pyplot as plt + +# evenly sampled time at 200ms intervals +t = np.arange(0., 5., 0.2) + +# red dashes, blue squares and green triangles +plt.plot(t, t, 'r--', + t, t**2, 'bs', + t, t**3, 'g^') + +plt.show() +``` + + +![png](output_26_0.png) + + +### 传入多组数据 + +事实上,在上面的例子中,我们不仅仅向 `plot` 函数传入了数组,还传入了多组 `(x,y,format_str)` 参数,它们在同一张图上显示。 + +这意味着我们不需要使用多个 `plot` 函数来画多组数组,只需要可以将这些组合放到一个 `plot` 函数中去即可。 + +### 线条属性 + +之前提到,我们可以用字符串来控制线条的属性,事实上还可以通过关键词来改变线条的性质,例如 `linwidth` 可以改变线条的宽度,`color` 可以改变线条的颜色: + + +```python +x = np.linspace(-np.pi,np.pi) +y = np.sin(x) + +plt.plot(x, y, linewidth=2.0, color='r') + +plt.show() +``` + + +![png](output_31_0.png) + + +### 使用 plt.plot() 的返回值来设置线条属性 + +`plot` 函数返回一个 `Line2D` 对象组成的列表,每个对象代表输入的一对组合,例如: + +- line1, line2 为两个 Line2D 对象 + + `line1, line2 = plt.plot(x1, y1, x2, y2)` + +- 返回 3 个 Line2D 对象组成的列表 + + `lines = plt.plot(x1, y1, x2, y2, x3, y3)` + +我们可以使用这个返回值来对线条属性进行设置: + + +```python +# 加逗号 line 中得到的是 line2D 对象,不加逗号得到的是只有一个 line2D 对象的列表 +line, = plt.plot(x, y, 'r-') + +# 将抗锯齿关闭 +line.set_antialiased(False) + +plt.show() +``` + + +![png](output_34_0.png) + + +### plt.setp() 修改线条性质 + +更方便的做法是使用 `plt` 的 `setp` 函数: + + +```python +lines = plt.plot(x, y) + +# 使用键值对 +plt.setp(lines, color='r', linewidth=2.0) + +# 或者使用 MATLAB 风格的字符串对 +plt.setp(lines, 'color', 'r', 'linewidth', 2.0) + +plt.show() +``` + + +![png](output_37_0.png) + + +可以设置的属性有很多,可以使用 `plt.setp(lines)` 查看 `lines` 可以设置的属性,各属性的含义可参考 `matplotlib` 的文档。 + + +```python +plt.setp(lines) +``` + + agg_filter: unknown + alpha: float (0.0 transparent through 1.0 opaque) + animated: [True | False] + antialiased or aa: [True | False] + axes: an :class:`~matplotlib.axes.Axes` instance + clip_box: a :class:`matplotlib.transforms.Bbox` instance + clip_on: [True | False] + clip_path: [ (:class:`~matplotlib.path.Path`, :class:`~matplotlib.transforms.Transform`) | :class:`~matplotlib.patches.Patch` | None ] + color or c: any matplotlib color + contains: a callable function + dash_capstyle: ['butt' | 'round' | 'projecting'] + dash_joinstyle: ['miter' | 'round' | 'bevel'] + dashes: sequence of on/off ink in points + drawstyle: ['default' | 'steps' | 'steps-pre' | 'steps-mid' | 'steps-post'] + figure: a :class:`matplotlib.figure.Figure` instance + fillstyle: ['full' | 'left' | 'right' | 'bottom' | 'top' | 'none'] + gid: an id string + label: string or anything printable with '%s' conversion. + linestyle or ls: [``'-'`` | ``'--'`` | ``'-.'`` | ``':'`` | ``'None'`` | ``' '`` | ``''``] + linewidth or lw: float value in points + lod: [True | False] + marker: :mod:`A valid marker style ` + markeredgecolor or mec: any matplotlib color + markeredgewidth or mew: float value in points + markerfacecolor or mfc: any matplotlib color + markerfacecoloralt or mfcalt: any matplotlib color + markersize or ms: float + markevery: [None | int | length-2 tuple of int | slice | list/array of int | float | length-2 tuple of float] + path_effects: unknown + picker: float distance in points or callable pick function ``fn(artist, event)`` + pickradius: float distance in points + rasterized: [True | False | None] + sketch_params: unknown + snap: unknown + solid_capstyle: ['butt' | 'round' | 'projecting'] + solid_joinstyle: ['miter' | 'round' | 'bevel'] + transform: a :class:`matplotlib.transforms.Transform` instance + url: a url string + visible: [True | False] + xdata: 1D array + ydata: 1D array + zorder: any number + + +## 子图 + +`figure()` 函数会产生一个指定编号为 `num` 的图: + + plt.figure(num) + +这里,`figure(1)` 其实是可以省略的,因为默认情况下 `plt` 会自动产生一幅图像。 + +使用 `subplot` 可以在一副图中生成多个子图,其参数为: + + plt.subplot(numrows, numcols, fignum) + +当 `numrows * numcols < 10` 时,中间的逗号可以省略,因此 `plt.subplot(211)` 就相当于 `plt.subplot(2,1,1)`。 + + +```python +def f(t): + return np.exp(-t) * np.cos(2*np.pi*t) + +t1 = np.arange(0.0, 5.0, 0.1) +t2 = np.arange(0.0, 5.0, 0.02) + +plt.figure(1) +plt.subplot(211) +plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k') + +plt.subplot(212) +plt.plot(t2, np.cos(2*np.pi*t2), 'r--') +plt.show() +``` + + +![png](output_42_0.png) + + +## 图形上加上文字 + +`plt.hist()` 可以用来画直方图。 + + +```python +mu, sigma = 100, 15 +x = mu + sigma * np.random.randn(10000) + +# the histogram of the data +n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75) + + +plt.xlabel('Smarts') +plt.ylabel('Probability') +plt.title('Histogram of IQ') +plt.text(60, .025, r'$\mu=100,\ \sigma=15$') +plt.axis([40, 160, 0, 0.03]) +plt.grid(True) +plt.show() +``` + + +![png](output_45_0.png) + + +对于这幅图形,我们使用 `xlabel` ,`ylabel`,`title`,`text` 方法设置了文字,其中: + +- `xlabel` :x 轴标注 + +- `ylabel` :y 轴标注 + +- `title` :图形标题 + +- `text` :在指定位置放入文字 + +输入特殊符号支持使用 `Tex` 语法,用 `$$` 隔开。 + +除了使用 `text` 在指定位置标上文字之外,还可以使用 `annotate` 函数进行注释,`annotate` 主要有两个参数: + +- `xy` :注释位置 +- `xytext` :注释文字位置 + + +```python +ax = plt.subplot(111) + +t = np.arange(0.0, 5.0, 0.01) +s = np.cos(2*np.pi*t) +line, = plt.plot(t, s, lw=2) + +plt.annotate('local max', xy=(2, 1), xytext=(3, 1.5), + arrowprops=dict(facecolor='black', shrink=0.05), + ) + +plt.ylim(-2,2) +plt.show() +``` + + +![png](output_47_0.png) + diff --git a/docs/06-matplotlib/06.02-customizing-plots-with-style-sheets.md b/docs/06-matplotlib/06.02-customizing-plots-with-style-sheets.md new file mode 100644 index 00000000..449c9861 --- /dev/null +++ b/docs/06-matplotlib/06.02-customizing-plots-with-style-sheets.md @@ -0,0 +1,128 @@ + +# 使用 style 来配置 pyplot 风格 + + +```python +import matplotlib.pyplot as plt +import numpy as np + +%matplotlib inline +``` + +`style` 是 `pyplot` 的一个子模块,方便进行风格转换, `pyplot` 有很多的预设风格,可以使用 `plt.style.available` 来查看: + + +```python +plt.style.available +``` + + + + + [u'dark_background', u'bmh', u'grayscale', u'ggplot', u'fivethirtyeight'] + + + + +```python +x = np.linspace(0, 2 * np.pi) +y = np.sin(x) + +plt.plot(x, y) + +plt.show() +``` + + +![png](output_4_0.png) + + +例如,我们可以模仿 `R` 语言中常用的 `ggplot` 风格: + + +```python +plt.style.use('ggplot') + +plt.plot(x, y) + +plt.show() +``` + + +![png](output_6_0.png) + + +有时候,我们不希望改变全局的风格,只是想暂时改变一下分隔,则可以使用 `context` 将风格改变限制在某一个代码块内: + + +```python +with plt.style.context(('dark_background')): + plt.plot(x, y, 'r-o') + plt.show() +``` + + +![png](output_8_0.png) + + +在代码块外绘图则仍然是全局的风格。 + + +```python +with plt.style.context(('dark_background')): + pass +plt.plot(x, y, 'r-o') +plt.show() +``` + + +![png](output_10_0.png) + + +还可以混搭使用多种风格,不过最右边的一种风格会将最左边的覆盖: + + +```python +plt.style.use(['dark_background', 'ggplot']) + +plt.plot(x, y, 'r-o') +plt.show() +``` + + +![png](output_12_0.png) + + +事实上,我们还可以自定义风格文件。 + +自定义文件需要放在 `matplotlib` 的配置文件夹 `mpl_configdir` 的子文件夹 `mpl_configdir/stylelib/` 下,以 `.mplstyle` 结尾。 + +`mpl_configdir` 的位置可以这样查看: + + +```python +import matplotlib +matplotlib.get_configdir() +``` + + + + + u'c:/Users/Jin\\.matplotlib' + + + +里面的内容以 `属性:值` 的形式保存: + +``` +axes.titlesize : 24 +axes.labelsize : 20 +lines.linewidth : 3 +lines.markersize : 10 +xtick.labelsize : 16 +ytick.labelsize : 16 +``` + +假设我们将其保存为 `mpl_configdir/stylelib/presentation.mplstyle`,那么使用这个风格的时候只需要调用: + + plt.style.use('presentation') diff --git a/docs/06-matplotlib/06.03-working-with-text---basic.md b/docs/06-matplotlib/06.03-working-with-text---basic.md new file mode 100644 index 00000000..c48e985c --- /dev/null +++ b/docs/06-matplotlib/06.03-working-with-text---basic.md @@ -0,0 +1,306 @@ + +# 处理文本(基础) + + +```python +import matplotlib.pyplot as plt +import numpy as np +%matplotlib inline +``` + +`matplotlib` 对文本的支持十分完善,包括数学公式,`Unicode` 文字,栅格和向量化输出,文字换行,文字旋转等一系列操作。 + +## 基础文本函数 + +在 `matplotlib.pyplot` 中,基础的文本函数如下: + +- `text()` 在 `Axes` 对象的任意位置添加文本 +- `xlabel()` 添加 x 轴标题 +- `ylabel()` 添加 y 轴标题 +- `title()` 给 `Axes` 对象添加标题 +- `figtext()` 在 `Figure` 对象的任意位置添加文本 +- `suptitle()` 给 `Figure` 对象添加标题 +- `anotate()` 给 `Axes` 对象添加注释(可选择是否添加箭头标记) + + +```python +# -*- coding: utf-8 -*- +import matplotlib.pyplot as plt +%matplotlib inline + +# plt.figure() 返回一个 Figure() 对象 +fig = plt.figure(figsize=(12, 9)) + +# 设置这个 Figure 对象的标题 +# 事实上,如果我们直接调用 plt.suptitle() 函数,它会自动找到当前的 Figure 对象 +fig.suptitle('bold figure suptitle', fontsize=14, fontweight='bold') + +# Axes 对象表示 Figure 对象中的子图 +# 这里只有一幅图像,所以使用 add_subplot(111) +ax = fig.add_subplot(111) +fig.subplots_adjust(top=0.85) + +# 可以直接使用 set_xxx 的方法来设置标题 +ax.set_title('axes title') +# 也可以直接调用 title(),因为会自动定位到当前的 Axes 对象 +# plt.title('axes title') + +ax.set_xlabel('xlabel') +ax.set_ylabel('ylabel') + +# 添加文本,斜体加文本框 +ax.text(3, 8, 'boxed italics text in data coords', style='italic', + bbox={'facecolor':'red', 'alpha':0.5, 'pad':10}) + +# 数学公式,用 $$ 输入 Tex 公式 +ax.text(2, 6, r'an equation: $E=mc^2$', fontsize=15) + +# Unicode 支持 +ax.text(3, 2, unicode('unicode: Institut f\374r Festk\366rperphysik', 'latin-1')) + +# 颜色,对齐方式 +ax.text(0.95, 0.01, 'colored text in axes coords', + verticalalignment='bottom', horizontalalignment='right', + transform=ax.transAxes, + color='green', fontsize=15) + +# 注释文本和箭头 +ax.plot([2], [1], 'o') +ax.annotate('annotate', xy=(2, 1), xytext=(3, 4), + arrowprops=dict(facecolor='black', shrink=0.05)) + +# 设置显示范围 +ax.axis([0, 10, 0, 10]) + +plt.show() +``` + + +![png](output_5_0.png) + + +## 文本属性和布局 + +我们可以通过下列关键词,在文本函数中设置文本的属性: + +关键词|值 +---|--- +alpha | float +backgroundcolor | any matplotlib color +bbox | rectangle prop dict plus key ``'pad'`` which is a pad in points +clip_box | a matplotlib.transform.Bbox instance +clip_on | [True , False] +clip_path | a Path instance and a Transform instance, a Patch +color | any matplotlib color +family | [ ``'serif'`` , ``'sans-serif'`` , ``'cursive'`` , ``'fantasy'`` , ``'monospace'`` ] +fontproperties | a matplotlib.font_manager.FontProperties instance +horizontalalignment or ha | [ ``'center'`` , ``'right'`` , ``'left'`` ] +label | any string +linespacing | float +multialignment | [``'left'`` , ``'right'`` , ``'center'`` ] +name or fontname | string e.g., [``'Sans'`` , ``'Courier'`` , ``'Helvetica'`` ...] +picker | [None,float,boolean,callable] +position | (x,y) +rotation | [ angle in degrees ``'vertical'`` , ``'horizontal'`` +size or fontsize | [ size in points , relative size, e.g., ``'smaller'``, ``'x-large'`` ] +style or fontstyle | [ ``'normal'`` , ``'italic'`` , ``'oblique'``] +text | string or anything printable with '%s' conversion +transform | a matplotlib.transform transformation instance +variant | [ ``'normal'`` , ``'small-caps'`` ] +verticalalignment or va | [ ``'center'`` , ``'top'`` , ``'bottom'`` , ``'baseline'`` ] +visible | [True , False] +weight or fontweight | [ ``'normal'`` , ``'bold'`` , ``'heavy'`` , ``'light'`` , ``'ultrabold'`` , ``'ultralight'``] +x | float +y | float +zorder | any number + +其中 `va`, `ha`, `multialignment` 可以用来控制布局。 +- `horizontalalignment` or `ha` :x 位置参数表示的位置 +- `verticalalignment` or `va`:y 位置参数表示的位置 +- `multialignment`:多行位置控制 + + +```python +import matplotlib.pyplot as plt +import matplotlib.patches as patches + +# build a rectangle in axes coords +left, width = .25, .5 +bottom, height = .25, .5 +right = left + width +top = bottom + height + +fig = plt.figure(figsize=(10,7)) +ax = fig.add_axes([0,0,1,1]) + +# axes coordinates are 0,0 is bottom left and 1,1 is upper right +p = patches.Rectangle( + (left, bottom), width, height, + fill=False, transform=ax.transAxes, clip_on=False + ) + +ax.add_patch(p) + +ax.text(left, bottom, 'left top', + horizontalalignment='left', + verticalalignment='top', + transform=ax.transAxes, + size='xx-large') + +ax.text(left, bottom, 'left bottom', + horizontalalignment='left', + verticalalignment='bottom', + transform=ax.transAxes, + size='xx-large') + +ax.text(right, top, 'right bottom', + horizontalalignment='right', + verticalalignment='bottom', + transform=ax.transAxes, + size='xx-large') + +ax.text(right, top, 'right top', + horizontalalignment='right', + verticalalignment='top', + transform=ax.transAxes, + size='xx-large') + +ax.text(right, bottom, 'center top', + horizontalalignment='center', + verticalalignment='top', + transform=ax.transAxes, + size='xx-large') + +ax.text(left, 0.5*(bottom+top), 'right center', + horizontalalignment='right', + verticalalignment='center', + rotation='vertical', + transform=ax.transAxes, + size='xx-large') + +ax.text(left, 0.5*(bottom+top), 'left center', + horizontalalignment='left', + verticalalignment='center', + rotation='vertical', + transform=ax.transAxes, + size='xx-large') + +ax.text(0.5*(left+right), 0.5*(bottom+top), 'middle', + horizontalalignment='center', + verticalalignment='center', + fontsize=20, color='red', + transform=ax.transAxes) + +ax.text(right, 0.5*(bottom+top), 'centered', + horizontalalignment='center', + verticalalignment='center', + rotation='vertical', + transform=ax.transAxes, + size='xx-large') + +ax.text(left, top, 'rotated\nwith newlines', + horizontalalignment='center', + verticalalignment='center', + rotation=45, + transform=ax.transAxes, + size='xx-large') + +ax.set_axis_off() +plt.show() +``` + + +![png](output_8_0.png) + + +## 注释文本 + +`text()` 函数在 Axes 对象的指定位置添加文本,而 `annotate()` 则是对某一点添加注释文本,需要考虑两个位置:一是注释点的坐标 `xy` ,二是注释文本的位置坐标 `xytext`: + + +```python +fig = plt.figure() +ax = fig.add_subplot(111) + +t = np.arange(0.0, 5.0, 0.01) +s = np.cos(2*np.pi*t) +line, = ax.plot(t, s, lw=2) + +ax.annotate('local max', xy=(2, 1), xytext=(3, 1.5), + arrowprops=dict(facecolor='black', shrink=0.05), + ) + +ax.set_ylim(-2,2) +plt.show() +``` + + +![png](output_11_0.png) + + +在上面的例子中,两个左边使用的都是原始数据的坐标系,不过我们还可以通过 `xycoords` 和 `textcoords` 来设置坐标系(默认是 `'data'`): + +参数|坐标系 +--|-- +‘figure points’| points from the lower left corner of the figure +‘figure pixels’| pixels from the lower left corner of the figure +‘figure fraction’| 0,0 is lower left of figure and 1,1 is upper right +‘axes points’| points from lower left corner of axes +‘axes pixels’| pixels from lower left corner of axes +‘axes fraction’| 0,0 is lower left of axes and 1,1 is upper right +‘data’| use the axes data coordinate system + +使用一个不同的坐标系: + + +```python +fig = plt.figure() +ax = fig.add_subplot(111) + +t = np.arange(0.0, 5.0, 0.01) +s = np.cos(2*np.pi*t) +line, = ax.plot(t, s, lw=2) + +ax.annotate('local max', xy=(3, 1), xycoords='data', + xytext=(0.8, 0.95), textcoords='axes fraction', + arrowprops=dict(facecolor='black', shrink=0.05), + horizontalalignment='right', verticalalignment='top', + ) + +ax.set_ylim(-2,2) +plt.show() +``` + + +![png](output_13_0.png) + + +## 极坐标系注释文本 + +产生极坐标系需要在 `subplot` 的参数中设置 `polar=True`: + + +```python +fig = plt.figure() +ax = fig.add_subplot(111, polar=True) +r = np.arange(0,1,0.001) +theta = 2*2*np.pi*r +line, = ax.plot(theta, r, color='#ee8d18', lw=3) + +ind = 800 +thisr, thistheta = r[ind], theta[ind] +ax.plot([thistheta], [thisr], 'o') +ax.annotate('a polar annotation', + xy=(thistheta, thisr), # theta, radius + xytext=(0.05, 0.05), # fraction, fraction + textcoords='figure fraction', + arrowprops=dict(facecolor='black', shrink=0.05), + horizontalalignment='left', + verticalalignment='bottom', + ) +plt.show() +``` + + +![png](output_16_0.png) + diff --git a/docs/06-matplotlib/06.04-working-with-text---math-expression.md b/docs/06-matplotlib/06.04-working-with-text---math-expression.md new file mode 100644 index 00000000..f9061443 --- /dev/null +++ b/docs/06-matplotlib/06.04-working-with-text---math-expression.md @@ -0,0 +1,149 @@ + +# 处理文本(数学表达式) + +在字符串中使用一对 `$$` 符号可以利用 `Tex` 语法打出数学表达式,而且并不需要预先安装 `Tex`。在使用时我们通常加上 `r` 标记表示它是一个原始字符串(raw string) + + +```python +import matplotlib.pyplot as plt +import numpy as np +%matplotlib inline +``` + + +```python +# plain text +plt.title('alpha > beta') + +plt.show() +``` + + +![png](output_3_0.png) + + + +```python +# math text +plt.title(r'$\alpha > \beta$') + +plt.show() +``` + + +![png](output_4_0.png) + + +## 上下标 + +使用 `_` 和 `^` 表示上下标: + +$\alpha_i > \beta_i$: + + r'$\alpha_i > \beta_i$' + +$\sum\limits_{i=0}^\infty x_i$: + + r'$\sum_{i=0}^\infty x_i$' + +注: + +- 希腊字母和特殊符号可以用 '\ + 对应的名字' 来显示 +- `{}` 中的内容属于一个部分;要打出花括号是需要使用 `\{\}` + +## 分数,二项式系数,stacked numbers + +$\frac{3}{4}, \binom{3}{4}, \stackrel{3}{4}$: + + r'$\frac{3}{4}, \binom{3}{4}, \stackrel{3}{4}$' + +$\frac{5 - \frac{1}{x}}{4}$: + + r'$\frac{5 - \frac{1}{x}}{4}$' + +在 Tex 语言中,括号始终是默认的大小,如果要使括号大小与括号内部的大小对应,可以使用 `\left` 和 `\right` 选项: + +$(\frac{5 - \frac{1}{x}}{4})$ + + r'$(\frac{5 - \frac{1}{x}}{4})$' + +$\left(\frac{5 - \frac{1}{x}}{4}\right)$: + + r'$\left(\frac{5 - \frac{1}{x}}{4}\right)$' + +## 根号 + +$\sqrt{2}$: + + r'$\sqrt{2}$' + +$\sqrt[3]{x}$: + + r'$\sqrt[3]{x}$' + +## 特殊字体 + +默认显示的字体是斜体,不过可以使用以下方法显示不同的字体: + +命令|显示 +--|-- +\mathrm{Roman}|$\mathrm{Roman}$ +\mathit{Italic}|$\mathit{Italic}$ +\mathtt{Typewriter}|$\mathtt{Typewriter}$ +\mathcal{CALLIGRAPHY}|$\mathcal{CALLIGRAPHY}$ +\mathbb{blackboard}|$\mathbb{blackboard}$ +\mathfrak{Fraktur}|$\mathfrak{Fraktur}$ +\mathsf{sansserif}|$\mathsf{sansserif}$ + +$s(t) = \mathcal{A}\ \sin(2 \omega t)$: + + s(t) = \mathcal{A}\ \sin(2 \omega t) + +注: + +- Tex 语法默认忽略空格,要打出空格使用 `'\ '` +- \sin 默认显示为 Roman 字体 + +## 音调 + +命令|结果 +--|-- +`\acute a`| $\acute a$ +`\bar a`| $\bar a$ +`\breve a` | $\breve a$ +`\ddot a`| $\ddot a$ +`\dot a` | $\dot a$ +`\grave a`| $\grave a$ +`\hat a`| $\hat a$ +`\tilde a` | $\tilde a$ +`\4vec a` | $\vec a$ +`\overline{abc}`|$\overline{abc}$ +`\widehat{xyz}`|$\widehat{xyz}$ +`\widetilde{xyz}`|$\widetilde{xyz}$ + +## 特殊字符表 + +参见:http://matplotlib.org/users/mathtext.html#symbols + +## 例子 + + +```python +import numpy as np +import matplotlib.pyplot as plt +t = np.arange(0.0, 2.0, 0.01) +s = np.sin(2*np.pi*t) + +plt.plot(t,s) +plt.title(r'$\alpha_i > \beta_i$', fontsize=20) +plt.text(1, -0.6, r'$\sum_{i=0}^\infty x_i$', fontsize=20) +plt.text(0.6, 0.6, r'$\mathcal{A}\ \mathrm{sin}(2 \omega t)$', + fontsize=20) +plt.xlabel('time (s)') +plt.ylabel('volts (mV)') +plt.show() +``` + + +![png](output_18_0.png) + diff --git a/docs/06-matplotlib/06.05-image-tutorial.md b/docs/06-matplotlib/06.05-image-tutorial.md new file mode 100644 index 00000000..35790e0c --- /dev/null +++ b/docs/06-matplotlib/06.05-image-tutorial.md @@ -0,0 +1,177 @@ + +# 图像基础 + +导入相应的包: + + +```python +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import numpy as np +%matplotlib inline +``` + +![臭虫](stinkbug.png) + +## 导入图像 + +我们首先导入上面的图像,注意 `matplotlib` 默认只支持 `PNG` 格式的图像,我们可以使用 `mpimg.imread` 方法读入这幅图像: + + +```python +img = mpimg.imread('stinkbug.png') +``` + + +```python +img.shape +``` + + + + + (375L, 500L, 3L) + + + +这是一个 `375 x 500 x 3` 的 `RGB` 图像,并且每个像素使用 uint8 分别表示 `RGB` 三个通道的值。不过在处理的时候,`matplotlib` 将它们的值归一化到 `0.0~1.0` 之间: + + +```python +img.dtype +``` + + + + + dtype('float32') + + + +## 显示图像 + +使用 `plt.imshow()` 可以显示图像: + + +```python +imgplot = plt.imshow(img) +``` + + +![png](output_12_0.png) + + +## 伪彩色图像 + +从单通道模拟彩色图像: + + +```python +lum_img = img[:,:,0] +imgplot = plt.imshow(lum_img) +``` + + +![png](output_15_0.png) + + +## 改变 colormap + + +```python +imgplot = plt.imshow(lum_img) +imgplot.set_cmap('hot') +``` + + +![png](output_17_0.png) + + + +```python +imgplot = plt.imshow(lum_img) +imgplot.set_cmap('spectral') +``` + + +![png](output_18_0.png) + + +显示色度条: + + +```python +imgplot = plt.imshow(lum_img) +imgplot.set_cmap('spectral') +plt.colorbar() +plt.show() +``` + + +![png](output_20_0.png) + + +## 限制显示范围 + +先查看直方图: + + +```python +plt.hist(lum_img.flatten(), 256, range=(0.0,1.0), fc='k', ec='k') +plt.show() +``` + + +![png](output_23_0.png) + + +将显示范围设为 `0.0-0.7`: + + +```python +imgplot = plt.imshow(lum_img) +imgplot.set_clim(0.0,0.7) +``` + + +![png](output_25_0.png) + + +## resize 操作 + + +```python +from PIL import Image +img = Image.open('stinkbug.png') +rsize = img.resize((img.size[0]/10,img.size[1]/10)) +rsizeArr = np.asarray(rsize) +imgplot = plt.imshow(rsizeArr) +``` + + +![png](output_27_0.png) + + +上面我们将这个图像使用 PIL 的 `Image` 对象导入,并将其 `resize` 为原来的 1/100,可以看到很多细节都丢失了。 + +在画图时,由于画面的大小与实际像素的大小可能不一致,所以不一致的地方会进行插值处理,尝试一下不同的插值方法: + + +```python +imgplot = plt.imshow(rsizeArr) +imgplot.set_interpolation('nearest') +``` + + +![png](output_29_0.png) + + + +```python +imgplot = plt.imshow(rsizeArr) +imgplot.set_interpolation('bicubic') +``` + + +![png](output_30_0.png) + diff --git a/docs/06-matplotlib/06.06-annotating-axes.md b/docs/06-matplotlib/06.06-annotating-axes.md new file mode 100644 index 00000000..e30ac80a --- /dev/null +++ b/docs/06-matplotlib/06.06-annotating-axes.md @@ -0,0 +1,452 @@ + +# 注释 + +## 使用文本框进行注释 + +先看一个简单的例子: + + +```python +import numpy.random +import matplotlib.pyplot as plt +%matplotlib inline + +fig = plt.figure(1, figsize=(5,5)) +fig.clf() + +ax = fig.add_subplot(111) +ax.set_aspect(1) + +x1 = -1 + numpy.random.randn(100) +y1 = -1 + numpy.random.randn(100) +x2 = 1. + numpy.random.randn(100) +y2 = 1. + numpy.random.randn(100) + +ax.scatter(x1, y1, color="r") +ax.scatter(x2, y2, color="g") + +# 加上两个文本框 +bbox_props = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9) +ax.text(-2, -2, "Sample A", ha="center", va="center", size=20, + bbox=bbox_props) +ax.text(2, 2, "Sample B", ha="center", va="center", size=20, + bbox=bbox_props) + +# 加上一个箭头文本框 +bbox_props = dict(boxstyle="rarrow", fc=(0.8,0.9,0.9), ec="b", lw=2) +t = ax.text(0, 0, "Direction", ha="center", va="center", rotation=45, + size=15, + bbox=bbox_props) + +bb = t.get_bbox_patch() +bb.set_boxstyle("rarrow", pad=0.6) + +ax.set_xlim(-4, 4) +ax.set_ylim(-4, 4) + +plt.show() +``` + + +![png](output_3_0.png) + + +`text()` 函数接受 `bbox` 参数来绘制文本框。 +```python +bbox_props = dict(boxstyle="rarrow,pad=0.3", fc="cyan", ec="b", lw=2) +t = ax.text(0, 0, "Direction", ha="center", va="center", rotation=45, + size=15, + bbox=bbox_props) +``` + +可以这样来获取这个文本框,并对其参数进行修改: +```python +bb = t.get_bbox_patch() +bb.set_boxstyle("rarrow", pad=0.6) +``` + +可用的文本框风格有: + +class|name|attrs +---|---|--- +LArrow |larrow |pad=0.3 +RArrow |rarrow |pad=0.3 +Round |round |pad=0.3,rounding_size=None +Round4 |round4 |pad=0.3,rounding_size=None +Roundtooth |roundtooth |pad=0.3,tooth_size=None +Sawtooth |sawtooth |pad=0.3,tooth_size=None +Square |square |pad=0.3 + + +```python +import matplotlib.patches as mpatch +import matplotlib.pyplot as plt + +styles = mpatch.BoxStyle.get_styles() + +figheight = (len(styles)+.5) +fig1 = plt.figure(figsize=(4/1.5, figheight/1.5)) +fontsize = 0.3 * 72 +ax = fig1.add_subplot(111) + +for i, (stylename, styleclass) in enumerate(styles.items()): + ax.text(0.5, (float(len(styles)) - 0.5 - i)/figheight, stylename, + ha="center", + size=fontsize, + transform=fig1.transFigure, + bbox=dict(boxstyle=stylename, fc="w", ec="k")) + +# 去掉轴的显示 +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') +ax.spines['left'].set_color('none') +ax.spines['bottom'].set_color('none') +plt.xticks([]) +plt.yticks([]) + +plt.show() +``` + + +![png](output_5_0.png) + + +各个风格的文本框如上图所示。 + +## 使用箭头进行注释 + + +```python +plt.figure(1, figsize=(3,3)) +ax = plt.subplot(111) + +ax.annotate("", + xy=(0.2, 0.2), xycoords='data', + xytext=(0.8, 0.8), textcoords='data', + arrowprops=dict(arrowstyle="->", + connectionstyle="arc3"), + ) + +plt.show() +``` + + +![png](output_8_0.png) + + +之前介绍了 `annotate` 中 `xy, xycoords, xytext, textcoords` 参数的含义,通常我们把 `xy` 设在 `data` 坐标系,把 `xytext` 设在 `offset` 即以注释点为原点的参考系。 + +箭头显示是可选的,用 `arrowprops` 参数来指定,接受一个字典作为参数。 + +不同类型的绘制箭头方式: + + +```python +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches + +x1, y1 = 0.3, 0.3 +x2, y2 = 0.7, 0.7 + +fig = plt.figure(1, figsize=(8,3)) +fig.clf() +from mpl_toolkits.axes_grid.axes_grid import AxesGrid +from mpl_toolkits.axes_grid.anchored_artists import AnchoredText + +#from matplotlib.font_manager import FontProperties + +def add_at(ax, t, loc=2): + fp = dict(size=10) + _at = AnchoredText(t, loc=loc, prop=fp) + ax.add_artist(_at) + return _at + + +grid = AxesGrid(fig, 111, (1, 4), label_mode="1", share_all=True) + +grid[0].set_autoscale_on(False) + +ax = grid[0] +ax.plot([x1, x2], [y1, y2], ".") +el = mpatches.Ellipse((x1, y1), 0.3, 0.4, angle=30, alpha=0.2) +ax.add_artist(el) +ax.annotate("", + xy=(x1, y1), xycoords='data', + xytext=(x2, y2), textcoords='data', + arrowprops=dict(arrowstyle="-", #linestyle="dashed", + color="0.5", + patchB=None, + shrinkB=0, + connectionstyle="arc3,rad=0.3", + ), + ) + +add_at(ax, "connect", loc=2) + +ax = grid[1] +ax.plot([x1, x2], [y1, y2], ".") +el = mpatches.Ellipse((x1, y1), 0.3, 0.4, angle=30, alpha=0.2) +ax.add_artist(el) +ax.annotate("", + xy=(x1, y1), xycoords='data', + xytext=(x2, y2), textcoords='data', + arrowprops=dict(arrowstyle="-", #linestyle="dashed", + color="0.5", + patchB=el, + shrinkB=0, + connectionstyle="arc3,rad=0.3", + ), + ) + +add_at(ax, "clip", loc=2) + + +ax = grid[2] +ax.plot([x1, x2], [y1, y2], ".") +el = mpatches.Ellipse((x1, y1), 0.3, 0.4, angle=30, alpha=0.2) +ax.add_artist(el) +ax.annotate("", + xy=(x1, y1), xycoords='data', + xytext=(x2, y2), textcoords='data', + arrowprops=dict(arrowstyle="-", #linestyle="dashed", + color="0.5", + patchB=el, + shrinkB=5, + connectionstyle="arc3,rad=0.3", + ), + ) + +add_at(ax, "shrink", loc=2) + + +ax = grid[3] +ax.plot([x1, x2], [y1, y2], ".") +el = mpatches.Ellipse((x1, y1), 0.3, 0.4, angle=30, alpha=0.2) +ax.add_artist(el) +ax.annotate("", + xy=(x1, y1), xycoords='data', + xytext=(x2, y2), textcoords='data', + arrowprops=dict(arrowstyle="fancy", #linestyle="dashed", + color="0.5", + patchB=el, + shrinkB=5, + connectionstyle="arc3,rad=0.3", + ), + ) + +add_at(ax, "mutate", loc=2) + +grid[0].set_xlim(0, 1) +grid[0].set_ylim(0, 1) +grid[0].axis["bottom"].toggle(ticklabels=False) +grid[0].axis["left"].toggle(ticklabels=False) +fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95) + +plt.draw() +plt.show() +``` + + +![png](output_10_0.png) + + +字典中,`connectionstyle` 参数控制路径的风格: + +Name | Attr +----|---- +angle| angleA=90,angleB=0,rad=0.0 +angle3| angleA=90,angleB=0 +arc| angleA=0,angleB=0,armA=None,armB=None,rad=0.0 +arc3| rad=0.0 +bar| armA=0.0,armB=0.0,fraction=0.3,angle=None + + +```python +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches + +fig = plt.figure(1, figsize=(8,5)) +fig.clf() +from mpl_toolkits.axes_grid.axes_grid import AxesGrid +from mpl_toolkits.axes_grid.anchored_artists import AnchoredText + +#from matplotlib.font_manager import FontProperties + +def add_at(ax, t, loc=2): + fp = dict(size=8) + _at = AnchoredText(t, loc=loc, prop=fp) + ax.add_artist(_at) + return _at + + +grid = AxesGrid(fig, 111, (3, 5), label_mode="1", share_all=True) + +grid[0].set_autoscale_on(False) + + +x1, y1 = 0.3, 0.3 +x2, y2 = 0.7, 0.7 + + +def demo_con_style(ax, connectionstyle, label=None): + + if label is None: + label = connectionstyle + + x1, y1 = 0.3, 0.2 + x2, y2 = 0.8, 0.6 + + ax.plot([x1, x2], [y1, y2], ".") + ax.annotate("", + xy=(x1, y1), xycoords='data', + xytext=(x2, y2), textcoords='data', + arrowprops=dict(arrowstyle="->", #linestyle="dashed", + color="0.5", + shrinkA=5, shrinkB=5, + patchA=None, + patchB=None, + connectionstyle=connectionstyle, + ), + ) + + add_at(ax, label, loc=2) + +column = grid.axes_column[0] + +demo_con_style(column[0], "angle3,angleA=90,angleB=0", + label="angle3,\nangleA=90,\nangleB=0") +demo_con_style(column[1], "angle3,angleA=0,angleB=90", + label="angle3,\nangleA=0,\nangleB=90") + + + +column = grid.axes_column[1] + +demo_con_style(column[0], "arc3,rad=0.") +demo_con_style(column[1], "arc3,rad=0.3") +demo_con_style(column[2], "arc3,rad=-0.3") + + + +column = grid.axes_column[2] + +demo_con_style(column[0], "angle,angleA=-90,angleB=180,rad=0", + label="angle,\nangleA=-90,\nangleB=180,\nrad=0") +demo_con_style(column[1], "angle,angleA=-90,angleB=180,rad=5", + label="angle,\nangleA=-90,\nangleB=180,\nrad=5") +demo_con_style(column[2], "angle,angleA=-90,angleB=10,rad=5", + label="angle,\nangleA=-90,\nangleB=10,\nrad=0") + + +column = grid.axes_column[3] + +demo_con_style(column[0], "arc,angleA=-90,angleB=0,armA=30,armB=30,rad=0", + label="arc,\nangleA=-90,\nangleB=0,\narmA=30,\narmB=30,\nrad=0") +demo_con_style(column[1], "arc,angleA=-90,angleB=0,armA=30,armB=30,rad=5", + label="arc,\nangleA=-90,\nangleB=0,\narmA=30,\narmB=30,\nrad=5") +demo_con_style(column[2], "arc,angleA=-90,angleB=0,armA=0,armB=40,rad=0", + label="arc,\nangleA=-90,\nangleB=0,\narmA=0,\narmB=40,\nrad=0") + + +column = grid.axes_column[4] + +demo_con_style(column[0], "bar,fraction=0.3", + label="bar,\nfraction=0.3") +demo_con_style(column[1], "bar,fraction=-0.3", + label="bar,\nfraction=-0.3") +demo_con_style(column[2], "bar,angle=180,fraction=-0.2", + label="bar,\nangle=180,\nfraction=-0.2") + + +#demo_con_style(column[1], "arc3,rad=0.3") +#demo_con_style(column[2], "arc3,rad=-0.3") + + +grid[0].set_xlim(0, 1) +grid[0].set_ylim(0, 1) +grid.axes_llc.axis["bottom"].toggle(ticklabels=False) +grid.axes_llc.axis["left"].toggle(ticklabels=False) +fig.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95) + +plt.draw() +plt.show() +``` + + +![png](output_12_0.png) + + +`arrowstyle` 参数控制小箭头的风格: + +Name | Attrs +--- |--- +`-` |None +`->` |head_length=0.4,head_width=0.2 +`-[` |widthB=1.0,lengthB=0.2,angleB=None +¦`-`¦ |widthA=1.0,widthB=1.0 +`-`¦`>` |head_length=0.4,head_width=0.2 +`<-` |head_length=0.4,head_width=0.2 +`<->` |head_length=0.4,head_width=0.2 +`<`¦`-` |head_length=0.4,head_width=0.2 +`<`¦-¦`>` |head_length=0.4,head_width=0.2 +`fancy` |head_length=0.4,head_width=0.4,tail_width=0.4 +`simple` |head_length=0.5,head_width=0.5,tail_width=0.2 +`wedge` |tail_width=0.3,shrink_factor=0.5 + + +```python +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt + +styles = mpatches.ArrowStyle.get_styles() + +ncol=2 +nrow = (len(styles)+1) // ncol +figheight = (nrow+0.5) +fig1 = plt.figure(1, (4.*ncol/1.5, figheight/1.5)) +fontsize = 0.2 * 70 + + +ax = fig1.add_axes([0, 0, 1, 1], frameon=False, aspect=1.) + +ax.set_xlim(0, 4*ncol) +ax.set_ylim(0, figheight) + +def to_texstring(s): + s = s.replace("<", r"$<$") + s = s.replace(">", r"$>$") + s = s.replace("|", r"$|$") + return s + +for i, (stylename, styleclass) in enumerate(sorted(styles.items())): + x = 3.2 + (i//nrow)*4 + y = (figheight - 0.7 - i%nrow) # /figheight + p = mpatches.Circle((x, y), 0.2, fc="w") + ax.add_patch(p) + + ax.annotate(to_texstring(stylename), (x, y), + (x-1.2, y), + #xycoords="figure fraction", textcoords="figure fraction", + ha="right", va="center", + size=fontsize, + arrowprops=dict(arrowstyle=stylename, + patchB=p, + shrinkA=5, + shrinkB=5, + fc="w", ec="k", + connectionstyle="arc3,rad=-0.05", + ), + bbox=dict(boxstyle="square", fc="w")) + +ax.xaxis.set_visible(False) +ax.yaxis.set_visible(False) + + + +plt.draw() +plt.show() +``` + + +![png](output_14_0.png) + diff --git a/docs/06-matplotlib/06.07-legend.md b/docs/06-matplotlib/06.07-legend.md new file mode 100644 index 00000000..82120b1c --- /dev/null +++ b/docs/06-matplotlib/06.07-legend.md @@ -0,0 +1,262 @@ + +# 标签 + + +```python +import numpy as np +import matplotlib as mpl +import matplotlib.pyplot as plt + +%matplotlib inline +``` + +`legend()` 函数被用来添加图像的标签,其主要相关的属性有: + +- legend entry - 一个 legend 包含一个或多个 entry,一个 entry 对应一个 key 和一个 label +- legend key - marker 的标记 +- legend label - key 的说明 +- legend handle - 一个 entry 在图上对应的对象 + +## 使用 legend + +调用 `legend()` 会自动获取当前的 `Axes` 对象,并且得到这些 handles 和 labels,相当于: + + handles, labels = ax.get_legend_handles_labels() + ax.legend(handles, labels) + +我们可以在函数中指定 `handles` 的参数: + + +```python +line_up, = plt.plot([1,2,3], label='Line 2') +line_down, = plt.plot([3,2,1], label='Line 1') +plt.legend(handles=[line_up, line_down]) +plt.show() +``` + + +![png](output_5_0.png) + + +可以将 labels 作为参数输入 `legend` 函数: + + +```python +line_up, = plt.plot([1,2,3]) +line_down, = plt.plot([3,2,1]) +plt.legend([line_up, line_down], ['Line Up', 'Line Down']) +plt.show() +``` + + +![png](output_7_0.png) + + +## 产生特殊形状的 marker key + +有时我们可以产生一些特殊形状的 marker: + +块状: + + +```python +import matplotlib.patches as mpatches + +red_patch = mpatches.Patch(color='red', label='The red data') +plt.legend(handles=[red_patch]) + +plt.show() +``` + + +![png](output_10_0.png) + + +点线组合: + + +```python +import matplotlib.lines as mlines +import matplotlib.pyplot as plt + +blue_line = mlines.Line2D([], [], color='blue', marker='*', + markersize=15, label='Blue stars') +plt.legend(handles=[blue_line]) + +plt.show() +``` + + +![png](output_12_0.png) + + +## 指定 legend 的位置 + +`bbox_to_anchor` 关键词可以指定 `legend` 放置的位置,例如放到图像的右上角: + + +```python +plt.plot([1,2,3], label="test1") +plt.plot([3,2,1], label="test2") +plt.legend(bbox_to_anchor=(1, 1), + bbox_transform=plt.gcf().transFigure) + +plt.show() +``` + + +![png](output_15_0.png) + + +更复杂的用法: + + +```python +plt.subplot(211) +plt.plot([1,2,3], label="test1") +plt.plot([3,2,1], label="test2") +# Place a legend above this legend, expanding itself to +# fully use the given bounding box. +plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, + ncol=2, mode="expand", borderaxespad=0.) + +plt.subplot(223) +plt.plot([1,2,3], label="test1") +plt.plot([3,2,1], label="test2") +# Place a legend to the right of this smaller figure. +plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) + +plt.show() +``` + + +![png](output_17_0.png) + + +## 同一个 Axes 中的多个 legend + +可以这样添加多个 `legend`: + + +```python +line1, = plt.plot([1,2,3], label="Line 1", linestyle='--') +line2, = plt.plot([3,2,1], label="Line 2", linewidth=4) + +# Create a legend for the first line. +first_legend = plt.legend(handles=[line1], loc=1) + +# Add the legend manually to the current Axes. +ax = plt.gca().add_artist(first_legend) + +# Create another legend for the second line. +plt.legend(handles=[line2], loc=4) + +plt.show() +``` + + +![png](output_20_0.png) + + +其中 `loc` 参数可以取 0-10 或者 字符串,表示放置的位置: + +loc string | loc code +---|--- +`'best' `| 0 +`'upper right' ` | 1 +`'upper left' ` |2 +`'lower left' ` |3 +`'lower right' ` |4 +`'right' ` | 5 +`'center left' ` |6 +`'center right'` | 7 +`'lower center'` | 8 +`'upper center'` | 9 +`'center'` |10 + +## 更多用法 + +多个 `handle` 可以通过括号组合在一个 entry 中: + + +```python +from numpy.random import randn + +z = randn(10) + +red_dot, = plt.plot(z, "ro", markersize=15) +# Put a white cross over some of the data. +white_cross, = plt.plot(z[:5], "w+", markeredgewidth=3, markersize=15) + +plt.legend([red_dot, (red_dot, white_cross)], ["Attr A", "Attr A+B"]) + +plt.show() +``` + + +![png](output_24_0.png) + + +自定义 `handle`: + + +```python +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches + +class AnyObject(object): + pass + +class AnyObjectHandler(object): + def legend_artist(self, legend, orig_handle, fontsize, handlebox): + x0, y0 = handlebox.xdescent, handlebox.ydescent + width, height = handlebox.width, handlebox.height + patch = mpatches.Rectangle([x0, y0], width, height, facecolor='red', + edgecolor='black', hatch='xx', lw=3, + transform=handlebox.get_transform()) + handlebox.add_artist(patch) + return patch + +plt.legend([AnyObject()], ['My first handler'], + handler_map={AnyObject: AnyObjectHandler()}) + +plt.show() +``` + + +![png](output_26_0.png) + + +椭圆: + + +```python +from matplotlib.legend_handler import HandlerPatch +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches + + +class HandlerEllipse(HandlerPatch): + def create_artists(self, legend, orig_handle, + xdescent, ydescent, width, height, fontsize, trans): + center = 0.5 * width - 0.5 * xdescent, 0.5 * height - 0.5 * ydescent + p = mpatches.Ellipse(xy=center, width=width + xdescent, + height=height + ydescent) + self.update_prop(p, orig_handle, legend) + p.set_transform(trans) + return [p] + + +c = mpatches.Circle((0.5, 0.5), 0.25, facecolor="green", + edgecolor="red", linewidth=3) +plt.gca().add_patch(c) + +plt.legend([c], ["An ellipse, not a rectangle"], + handler_map={mpatches.Circle: HandlerEllipse()}) + +plt.show() +``` + + +![png](output_28_0.png) + diff --git a/docs/06-matplotlib/06.08-figures,-subplots,-axes-and-ticks.md b/docs/06-matplotlib/06.08-figures,-subplots,-axes-and-ticks.md new file mode 100644 index 00000000..8efa9f36 --- /dev/null +++ b/docs/06-matplotlib/06.08-figures,-subplots,-axes-and-ticks.md @@ -0,0 +1,142 @@ + +# figures, subplots, axes 和 ticks 对象 + +## figures, axes 和 ticks 的关系 + +这些对象的关系可以用下面的图来表示: + +示例图像: + +图1 + +具体结构: + +图2 + +## figure 对象 + +`figure` 对象是最外层的绘图单位,默认是以 `1` 开始编号(**MATLAB** 风格,`Figure 1, Figure 2, ...`),可以用 `plt.figure()` 产生一幅图像,除了默认参数外,可以指定的参数有: + +- `num` - 编号 +- `figsize` - 图像大小 +- `dpi` - 分辨率 +- `facecolor` - 背景色 +- `edgecolor` - 边界颜色 +- `frameon` - 边框 + +这些属性也可以通过 `Figure` 对象的 `set_xxx` 方法来改变。 + +## subplot 和 axes 对象 + +### subplot + +`subplot` 主要是使用网格排列子图: + + +```python +%pylab inline + +subplot(2,1,1) +xticks([]), yticks([]) +text(0.5,0.5, 'subplot(2,1,1)',ha='center',va='center',size=24,alpha=.5) + +subplot(2,1,2) +xticks([]), yticks([]) +text(0.5,0.5, 'subplot(2,1,2)',ha='center',va='center',size=24,alpha=.5) + +show() +``` + + Populating the interactive namespace from numpy and matplotlib + + + +![png](output_9_1.png) + + +更高级的可以用 `gridspec` 来绘图: + + +```python +import matplotlib.gridspec as gridspec + +G = gridspec.GridSpec(3, 3) + +axes_1 = subplot(G[0, :]) +xticks([]), yticks([]) +text(0.5,0.5, 'Axes 1',ha='center',va='center',size=24,alpha=.5) + +axes_2 = subplot(G[1,:-1]) +xticks([]), yticks([]) +text(0.5,0.5, 'Axes 2',ha='center',va='center',size=24,alpha=.5) + +axes_3 = subplot(G[1:, -1]) +xticks([]), yticks([]) +text(0.5,0.5, 'Axes 3',ha='center',va='center',size=24,alpha=.5) + +axes_4 = subplot(G[-1,0]) +xticks([]), yticks([]) +text(0.5,0.5, 'Axes 4',ha='center',va='center',size=24,alpha=.5) + +axes_5 = subplot(G[-1,-2]) +xticks([]), yticks([]) +text(0.5,0.5, 'Axes 5',ha='center',va='center',size=24,alpha=.5) + +show() +``` + + +![png](output_11_0.png) + + +## axes 对象 + +`subplot` 返回的是 `Axes` 对象,但是 `Axes` 对象相对于 `subplot` 返回的对象来说要更自由一点。`Axes` 对象可以放置在图像中的任意位置: + + +```python +axes([0.1,0.1,.8,.8]) +xticks([]), yticks([]) +text(0.6,0.6, 'axes([0.1,0.1,.8,.8])',ha='center',va='center',size=20,alpha=.5) + +axes([0.2,0.2,.3,.3]) +xticks([]), yticks([]) +text(0.5,0.5, 'axes([0.2,0.2,.3,.3])',ha='center',va='center',size=16,alpha=.5) + +show() +``` + + +![png](output_14_0.png) + + + +```python +axes([0.1,0.1,.5,.5]) +xticks([]), yticks([]) +text(0.1,0.1, 'axes([0.1,0.1,.8,.8])',ha='left',va='center',size=16,alpha=.5) + +axes([0.2,0.2,.5,.5]) +xticks([]), yticks([]) +text(0.1,0.1, 'axes([0.2,0.2,.5,.5])',ha='left',va='center',size=16,alpha=.5) + +axes([0.3,0.3,.5,.5]) +xticks([]), yticks([]) +text(0.1,0.1, 'axes([0.3,0.3,.5,.5])',ha='left',va='center',size=16,alpha=.5) + +axes([0.4,0.4,.5,.5]) +xticks([]), yticks([]) +text(0.1,0.1, 'axes([0.4,0.4,.5,.5])',ha='left',va='center',size=16,alpha=.5) + +show() +``` + + +![png](output_15_0.png) + + +后面的 `Axes` 对象会覆盖前面的内容。 + +## ticks 对象 + +ticks 用来注释轴的内容,我们可以通过控制它的属性来决定在哪里显示轴、轴的内容是什么等等。 diff --git a/docs/06-matplotlib/06.09-do-not-trust-the-defaults.md b/docs/06-matplotlib/06.09-do-not-trust-the-defaults.md new file mode 100644 index 00000000..dd991010 --- /dev/null +++ b/docs/06-matplotlib/06.09-do-not-trust-the-defaults.md @@ -0,0 +1,518 @@ + +# 不要迷信默认设置 + +导入相关的包: + + +```python +import numpy as np +import matplotlib.pyplot as plt +``` + +生成三角函数: + + +```python +x = np.linspace(-np.pi, np.pi) +c, s = np.cos(x), np.sin(x) +``` + +## 默认绘图 + + +```python +%matplotlib inline + +# 画图 +p = plt.plot(x,c) +p = plt.plot(x,s) + +# 在脚本中需要加上这句才会显示图像 +plt.show() +``` + + +![png](output_6_0.png) + + +默认效果如图所示,我们可以修改默认的属性来得到更漂亮的结果。 + +# 图 + +图像以 `Figure #` 为窗口标题,并且数字从 1 开始,`figure()` 函数的主要参数如下: + +参数 | 默认值 | 描述 +---|---|--- +`num`|`1`| 图号 +`figsize`|`figure.figsize`| 图大小(宽,高)(单位英寸) +`dpi`|`figure.dpi`| 分辨率(每英寸所打印的点数) +`facecolor`|`figure.facecolor`| 背景颜色 +`edgecolor`|`figure.edgecolor`| 边界颜色 +`frameon` |`True`| 是否显示图框架 + + +```python +# 设置图像大小 +f = plt.figure(figsize=(10,6), dpi=80) + +# 画图 +p = plt.plot(x,c) +p = plt.plot(x,s) + +# 在脚本中需要加上这句才会显示图像 +plt.show() +``` + +### 设置线条颜色,粗细,类型 + +首先,我们使用 figure() 函数来创建一幅新图像,并且指定它的大小,使得长宽比更合适。 + +然后,我们使用 `color, linewidth, linestyle` 参数,指定曲线的颜色,粗细,类型: + + +```python +# 设置图像大小 +f = plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +p = plt.plot(x, c, color="blue", linewidth=2.5, linestyle="-") +p = plt.plot(x, s, color="red", linewidth=2.5, linestyle="-") + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_13_0.png) + + +也可以像 **Matlab** 中一样使用格式字符来修改参数: + +表示颜色的字符参数有: + +字符 | 颜色 +-- | -- +`‘b’`| 蓝色,blue +`‘g’`| 绿色,green +`‘r’`| 红色,red +`‘c’`| 青色,cyan +`‘m’`| 品红,magenta +`‘y’`| 黄色,yellow +`‘k’`| 黑色,black +`‘w’`| 白色,white + +表示类型的字符参数有: + +字符|类型 | 字符|类型 +---|--- | --- | --- +` '-' `| 实线 | `'--'`| 虚线 +`'-.'`| 虚点线 | `':'`| 点线 +`'.'`| 点 | `','`| 像素点 +`'o'` |圆点 | `'v'`| 下三角点 +`'^'`| 上三角点 | `'<'`| 左三角点 +`'>'`| 右三角点 | `'1'`| 下三叉点 +`'2'`| 上三叉点 | `'3'`| 左三叉点 +`'4'`| 右三叉点 | `'s'`| 正方点 +`'p'` | 五角点 | `'*'`| 星形点 +`'h'`| 六边形点1 | `'H'`| 六边形点2 +`'+'`| 加号点 | `'x'`| 乘号点 +`'D'`| 实心菱形点 | `'d'`| 瘦菱形点 +`'_'`| 横线点 | | + + +```python +# 设置图像大小 +f = plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +p = plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_15_0.png) + + +### 设置横轴纵轴的显示区域 + +我们希望将坐标轴的显示区域放大一些,这样可以看到所有的点,可以使用 `plt` 中的 `xlim` 和 `ylim` 来设置: + + +```python +# 设置图像大小 +p = plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +p = plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +######################################################################## + +# 设置显示范围 +p = plt.xlim(x.min() * 1.1, x.max() * 1.1) +p = plt.ylim(c.min() * 1.1, c.max() * 1.1) + +######################################################################## + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_18_0.png) + + +### 设置刻度 + +对于三教函数来说,我们希望将 `x` 轴的刻度设为与 $\pi$ 有关的点,可以使用 `plt` 中的 `xticks` 和 `yticks` 函数,将需要的刻度传入: + + +```python +# 设置图像大小 +f = plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +p = plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 设置显示范围 +plt.xlim(x.min() * 1.1, x.max() * 1.1) +plt.ylim(c.min() * 1.1, c.max() * 1.1) + +########################################################################### + +# 设置刻度 +p = plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi]) +p = plt.yticks([-1, 0, 1]) + +########################################################################### + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_21_0.png) + + +### 设定 x 轴 y 轴标题 + +我们想让刻度的位置显示的是含有 $\pi$ 的标识而不是浮点数,可以在 `xticks` 中传入第二组参数,这组参数代表对应刻度的显示标识。这里,我们使用 `latex` 的语法来显示特殊符号(使用 `$$` 包围的部分): + + +```python +# 设置图像大小 +f = plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +p = plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 设置显示范围 +plt.xlim(x.min() * 1.1, x.max() * 1.1) +plt.ylim(c.min() * 1.1, c.max() * 1.1) + +# 设置刻度及其标识 +p = plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], + ['$-\pi$', '$-\pi/2$', '$0$', '$\pi/2$', '$\pi$'], fontsize ='xx-large') +p = plt.yticks([-1, 0, 1], + ['$-1$', '$0$', '$+1$'], fontsize ='xx-large') + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_24_0.png) + + +### 移动坐标轴的位置 + +现在坐标轴的位置是在边界上,而且有上下左右四条,我们现在想将下面和左边的两条移动到中间,并将右边和上面的两条去掉: + + +```python +# 设置图像大小 +f = plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 设置显示范围 +plt.xlim(x.min() * 1.1, x.max() * 1.1) +plt.ylim(c.min() * 1.1, c.max() * 1.1) + +# 得到轴的句柄 +ax = plt.gca() +# ax.spines参数表示四个坐标轴线 +# 将右边和上边的颜色设为透明 +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') + +################################################################################### + +# 将 x 轴的刻度设置在下面的坐标轴上 +ax.xaxis.set_ticks_position('bottom') +# 设置位置 +ax.spines['bottom'].set_position(('data',0)) + +# 将 y 轴的刻度设置在左边的坐标轴上 +ax.yaxis.set_ticks_position('left') +# 设置位置 +ax.spines['left'].set_position(('data',0)) + +################################################################################### + +# 设置刻度及其标识 +p = plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], + ['$-\pi$', '$-\pi/2$', '$0$', '$\pi/2$', '$\pi$'], fontsize ='xx-large') +p = plt.yticks([-1, 0, 1], + ['$-1$', '$0$', '$+1$'], fontsize ='xx-large') + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_27_0.png) + + +### 加入图例 + +使用 legend() 加入图例: + + +```python +# 设置图像大小 +plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 设置显示范围 +plt.xlim(x.min() * 1.1, x.max() * 1.1) +plt.ylim(c.min() * 1.1, c.max() * 1.1) + +# 得到画图的句柄 +ax = plt.gca() + +# ax.spines参数表示四个坐标轴线 +# 将右边和上边的颜色设为透明 +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') + +# 将 x 轴的刻度设置在下面的坐标轴上 +ax.xaxis.set_ticks_position('bottom') +# 设置位置 +ax.spines['bottom'].set_position(('data',0)) + +# 将 y 轴的刻度设置在左边的坐标轴上 +ax.yaxis.set_ticks_position('left') +# 设置位置 +ax.spines['left'].set_position(('data',0)) + +# 设置刻度及其标识 +plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], + ['$-\pi$', '$-\pi/2$', '$0$', '$\pi/2$', '$\pi$'], fontsize ='xx-large') +plt.yticks([-1, 0, 1], + ['$-1$', '$0$', '$+1$'], fontsize ='xx-large') + +################################################################################################## + +# 加入图例,frameon表示去掉图例周围的边框 +l = plt.legend(['cosine', 'sine'], loc='upper left', frameon=False) + +################################################################################################## + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_30_0.png) + + +### 注释特殊点 + +我们可以使用 `anotate` 函数来注释特殊的点,假设我们要显示的点是 $2\pi/3$: + + +```python +# 设置图像大小 +plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 设置显示范围 +plt.xlim(x.min() * 1.1, x.max() * 1.1) +plt.ylim(c.min() * 1.1, c.max() * 1.1) + +# 得到画图的句柄 +ax = plt.gca() + +# ax.spines参数表示四个坐标轴线 +# 将右边和上边的颜色设为透明 +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') + +# 将 x 轴的刻度设置在下面的坐标轴上 +ax.xaxis.set_ticks_position('bottom') +# 设置位置 +ax.spines['bottom'].set_position(('data',0)) + +# 将 y 轴的刻度设置在左边的坐标轴上 +ax.yaxis.set_ticks_position('left') +# 设置位置 +ax.spines['left'].set_position(('data',0)) + +# 设置刻度及其标识 +plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], + ['$-\pi$', '$-\pi/2$', '$0$', '$\pi/2$', '$\pi$'], fontsize ='xx-large') +plt.yticks([-1, 0, 1], + ['$-1$', '$0$', '$+1$'], fontsize ='xx-large') + +# 加入图例,frameon表示图例周围是否需要边框 +l = plt.legend(['cosine', 'sine'], loc='upper left', frameon=False) + +#################################################################################### + +# 数据点 +t = 2 * np.pi / 3 + +# 蓝色虚线 +plt.plot([t,t],[0,np.cos(t)], color ='blue', linewidth=2.5, linestyle="--") + +# 该点处的 cos 值 +plt.scatter([t,],[np.cos(t),], 50, color ='blue') + +# 在对应的点显示文本 +plt.annotate(r'$\sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$', # 文本 + xy=(t, np.sin(t)), # 数据点坐标位置 + xycoords='data', # 坐标相对于数据 + xytext=(+10, +30), # 文本位置坐标 + textcoords='offset points', # 坐标相对于数据点的坐标 + fontsize=16, # 文本大小 + arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) # 箭头 + +# 红色虚线 +p = plt.plot([t,t],[0,np.sin(t)], color ='red', linewidth=2.5, linestyle="--") + +# 该点处的 sin 值 +p = plt.scatter([t,],[np.sin(t),], 50, color ='red') + +# 显示文本 +p = plt.annotate(r'$\cos(\frac{2\pi}{3})=-\frac{1}{2}$', + xy=(t, np.cos(t)), xycoords='data', + xytext=(-90, -50), textcoords='offset points', fontsize=16, + arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) + + +##################################################################################### + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_33_0.png) + + +### 最后调整 + +调整刻度值的大小,并让其显示在曲线上方。 + + +```python +# 设置图像大小 +plt.figure(figsize=(10,6), dpi=80) + +# 画图,指定颜色,线宽,类型 +plt.plot(x, c, 'b-', + x, s, 'r-', linewidth=2.5) + +# 设置显示范围 +plt.xlim(x.min() * 1.1, x.max() * 1.1) +plt.ylim(c.min() * 1.1, c.max() * 1.1) + +# 得到画图的句柄 +ax = plt.gca() + +# ax.spines参数表示四个坐标轴线 +# 将右边和上边的颜色设为透明 +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') + +# 将 x 轴的刻度设置在下面的坐标轴上 +ax.xaxis.set_ticks_position('bottom') +# 设置位置 +ax.spines['bottom'].set_position(('data',0)) + +# 将 y 轴的刻度设置在左边的坐标轴上 +ax.yaxis.set_ticks_position('left') +# 设置位置 +ax.spines['left'].set_position(('data',0)) + +# 设置刻度及其标识 +plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi], + ['$-\pi$', '$-\pi/2$', '$0$', '$\pi/2$', '$\pi$'], fontsize ='xx-large') +plt.yticks([-1, 0, 1], + ['$-1$', '$0$', '$+1$'], fontsize ='xx-large') + +# 加入图例,frameon表示图例周围是否需要边框 +l = plt.legend(['cosine', 'sine'], loc='upper left', frameon=False) + +# 数据点 +t = 2 * np.pi / 3 + +# 蓝色虚线 +plt.plot([t,t],[0,np.cos(t)], color ='blue', linewidth=2.5, linestyle="--") + +# 该点处的 cos 值 +plt.scatter([t,],[np.cos(t),], 50, color ='blue') + +# 在对应的点显示文本 +plt.annotate(r'$\sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$', # 文本 + xy=(t, np.sin(t)), # 数据点坐标位置 + xycoords='data', # 坐标相对于数据 + xytext=(+10, +30), # 文本位置坐标 + textcoords='offset points', # 坐标相对于数据点的坐标 + fontsize=16, # 文本大小 + arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) # 箭头 + +# 红色虚线 +p = plt.plot([t,t],[0,np.sin(t)], color ='red', linewidth=2.5, linestyle="--") + +# 该点处的 sin 值 +p = plt.scatter([t,],[np.sin(t),], 50, color ='red') + +# 显示文本 +p = plt.annotate(r'$\cos(\frac{2\pi}{3})=-\frac{1}{2}$', + xy=(t, np.cos(t)), xycoords='data', + xytext=(-90, -50), textcoords='offset points', fontsize=16, + arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) + + +##################################################################################### + +for label in ax.get_xticklabels() + ax.get_yticklabels(): + label.set_fontsize(16) + label.set_bbox(dict(facecolor='white', edgecolor='None', alpha=0.65 )) + +#################################################################################### + +# 在脚本中需要加上这句才会显示图像 +# plt.show() +``` + + +![png](output_36_0.png) + + +> The devil is in the details. diff --git a/docs/06-matplotlib/06.10-different-plots.md b/docs/06-matplotlib/06.10-different-plots.md new file mode 100644 index 00000000..12c5de82 --- /dev/null +++ b/docs/06-matplotlib/06.10-different-plots.md @@ -0,0 +1,1101 @@ + +# 各种绘图实例 + +## 简单绘图 + +`plot` 函数: + + +```python +%matplotlib inline + +import numpy as np +import matplotlib.pyplot as plt + +t = np.arange(0.0, 2.0, 0.01) +s = np.sin(2*np.pi*t) +plt.plot(t, s) + +plt.xlabel('time (s)') +plt.ylabel('voltage (mV)') +plt.title('About as simple as it gets, folks') +plt.grid(True) +plt.show() +``` + + +![png](output_3_0.png) + + +## 子图 + +`subplot` 函数: + + +```python +import numpy as np +import matplotlib.mlab as mlab + +x1 = np.linspace(0.0, 5.0) +x2 = np.linspace(0.0, 2.0) + +y1 = np.cos(2 * np.pi * x1) * np.exp(-x1) +y2 = np.cos(2 * np.pi * x2) + +plt.subplot(2, 1, 1) +plt.plot(x1, y1, 'yo-') +plt.title('A tale of 2 subplots') +plt.ylabel('Damped oscillation') + +plt.subplot(2, 1, 2) +plt.plot(x2, y2, 'r.-') +plt.xlabel('time (s)') +plt.ylabel('Undamped') + +plt.show() +``` + + +![png](output_6_0.png) + + +## 直方图 + +`hist` 函数: + + +```python +import numpy as np +import matplotlib.mlab as mlab +import matplotlib.pyplot as plt + +# example data +mu = 100 # mean of distribution +sigma = 15 # standard deviation of distribution +x = mu + sigma * np.random.randn(10000) + +num_bins = 50 +# the histogram of the data +n, bins, patches = plt.hist(x, num_bins, normed=1, facecolor='green', alpha=0.5) +# add a 'best fit' line +y = mlab.normpdf(bins, mu, sigma) +plt.plot(bins, y, 'r--') +plt.xlabel('Smarts') +plt.ylabel('Probability') +plt.title(r'Histogram of IQ: $\mu=100$, $\sigma=15$') + +# Tweak spacing to prevent clipping of ylabel +plt.subplots_adjust(left=0.15) +plt.show() +``` + + +![png](output_9_0.png) + + +## 路径图 + +`matplotlib.path` 包: + + +```python +import matplotlib.path as mpath +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt + +fig, ax = plt.subplots() + +Path = mpath.Path +path_data = [ + (Path.MOVETO, (1.58, -2.57)), + (Path.CURVE4, (0.35, -1.1)), + (Path.CURVE4, (-1.75, 2.0)), + (Path.CURVE4, (0.375, 2.0)), + (Path.LINETO, (0.85, 1.15)), + (Path.CURVE4, (2.2, 3.2)), + (Path.CURVE4, (3, 0.05)), + (Path.CURVE4, (2.0, -0.5)), + (Path.CLOSEPOLY, (1.58, -2.57)), + ] +codes, verts = zip(*path_data) +path = mpath.Path(verts, codes) +patch = mpatches.PathPatch(path, facecolor='r', alpha=0.5) +ax.add_patch(patch) + +# plot control points and connecting lines +x, y = zip(*path.vertices) +line, = ax.plot(x, y, 'go-') + +ax.grid() +ax.axis('equal') +plt.show() +``` + + +![png](output_12_0.png) + + +## 三维绘图 + +导入 `Axex3D`: + + +```python +from mpl_toolkits.mplot3d import Axes3D +from matplotlib import cm +from matplotlib.ticker import LinearLocator, FormatStrFormatter +import matplotlib.pyplot as plt +import numpy as np + +fig = plt.figure() +ax = fig.gca(projection='3d') +X = np.arange(-5, 5, 0.25) +Y = np.arange(-5, 5, 0.25) +X, Y = np.meshgrid(X, Y) +R = np.sqrt(X**2 + Y**2) +Z = np.sin(R) +surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, + linewidth=0, antialiased=False) +ax.set_zlim(-1.01, 1.01) + +ax.zaxis.set_major_locator(LinearLocator(10)) +ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) + +fig.colorbar(surf, shrink=0.5, aspect=5) + +plt.show() +``` + + +![png](output_15_0.png) + + +## 流向图 + +主要函数:`plt.streamplot` + + +```python +import numpy as np +import matplotlib.pyplot as plt + +Y, X = np.mgrid[-3:3:100j, -3:3:100j] +U = -1 - X**2 + Y +V = 1 + X - Y**2 +speed = np.sqrt(U*U + V*V) + +plt.streamplot(X, Y, U, V, color=U, linewidth=2, cmap=plt.cm.autumn) +plt.colorbar() + +f, (ax1, ax2) = plt.subplots(ncols=2) +ax1.streamplot(X, Y, U, V, density=[0.5, 1]) + +lw = 5*speed/speed.max() +ax2.streamplot(X, Y, U, V, density=0.6, color='k', linewidth=lw) + +plt.show() +``` + + +![png](output_18_0.png) + + + +![png](output_18_1.png) + + +## 椭圆 + +`Ellipse` 对象: + + +```python +from pylab import figure, show, rand +from matplotlib.patches import Ellipse + +NUM = 250 + +ells = [Ellipse(xy=rand(2)*10, width=rand(), height=rand(), angle=rand()*360) + for i in range(NUM)] + +fig = figure() +ax = fig.add_subplot(111, aspect='equal') +for e in ells: + ax.add_artist(e) + e.set_clip_box(ax.bbox) + e.set_alpha(rand()) + e.set_facecolor(rand(3)) + +ax.set_xlim(0, 10) +ax.set_ylim(0, 10) + +show() +``` + + +![png](output_21_0.png) + + +## 条状图 + +`bar` 函数: + + +```python +import numpy as np +import matplotlib.pyplot as plt + + +n_groups = 5 + +means_men = (20, 35, 30, 35, 27) +std_men = (2, 3, 4, 1, 2) + +means_women = (25, 32, 34, 20, 25) +std_women = (3, 5, 2, 3, 3) + +fig, ax = plt.subplots() + +index = np.arange(n_groups) +bar_width = 0.35 + +opacity = 0.4 +error_config = {'ecolor': '0.3'} + +rects1 = plt.bar(index, means_men, bar_width, + alpha=opacity, + color='b', + yerr=std_men, + error_kw=error_config, + label='Men') + +rects2 = plt.bar(index + bar_width, means_women, bar_width, + alpha=opacity, + color='r', + yerr=std_women, + error_kw=error_config, + label='Women') + +plt.xlabel('Group') +plt.ylabel('Scores') +plt.title('Scores by group and gender') +plt.xticks(index + bar_width, ('A', 'B', 'C', 'D', 'E')) +plt.legend() + +plt.tight_layout() +plt.show() +``` + + +![png](output_24_0.png) + + +## 饼状图 + +`pie` 函数: + + +```python +import matplotlib.pyplot as plt + + +# The slices will be ordered and plotted counter-clockwise. +labels = 'Frogs', 'Hogs', 'Dogs', 'Logs' +sizes = [15, 30, 45, 10] +colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral'] +explode = (0, 0.1, 0, 0) # only "explode" the 2nd slice (i.e. 'Hogs') + +plt.pie(sizes, explode=explode, labels=labels, colors=colors, + autopct='%1.1f%%', shadow=True, startangle=90) +# Set aspect ratio to be equal so that pie is drawn as a circle. +plt.axis('equal') + +plt.show() +``` + + +![png](output_27_0.png) + + +## 图像中的表格 + +`table` 函数: + + +```python +import numpy as np +import matplotlib.pyplot as plt + + +data = [[ 66386, 174296, 75131, 577908, 32015], + [ 58230, 381139, 78045, 99308, 160454], + [ 89135, 80552, 152558, 497981, 603535], + [ 78415, 81858, 150656, 193263, 69638], + [ 139361, 331509, 343164, 781380, 52269]] + +columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail') +rows = ['%d year' % x for x in (100, 50, 20, 10, 5)] + +values = np.arange(0, 2500, 500) +value_increment = 1000 + +# Get some pastel shades for the colors +colors = plt.cm.BuPu(np.linspace(0, 0.5, len(columns))) +n_rows = len(data) + +index = np.arange(len(columns)) + 0.3 +bar_width = 0.4 + +# Initialize the vertical-offset for the stacked bar chart. +y_offset = np.array([0.0] * len(columns)) + +# Plot bars and create text labels for the table +cell_text = [] +for row in range(n_rows): + plt.bar(index, data[row], bar_width, bottom=y_offset, color=colors[row]) + y_offset = y_offset + data[row] + cell_text.append(['%1.1f' % (x/1000.0) for x in y_offset]) +# Reverse colors and text labels to display the last value at the top. +colors = colors[::-1] +cell_text.reverse() + +# Add a table at the bottom of the axes +the_table = plt.table(cellText=cell_text, + rowLabels=rows, + rowColours=colors, + colLabels=columns, + loc='bottom') + +# Adjust layout to make room for the table: +plt.subplots_adjust(left=0.2, bottom=0.2) + +plt.ylabel("Loss in ${0}'s".format(value_increment)) +plt.yticks(values * value_increment, ['%d' % val for val in values]) +plt.xticks([]) +plt.title('Loss by Disaster') + +plt.show() +``` + + +![png](output_30_0.png) + + +## 散点图 + +`scatter` 函数: + + +```python +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cbook as cbook + +# Load a numpy record array from yahoo csv data with fields date, +# open, close, volume, adj_close from the mpl-data/example directory. +# The record array stores python datetime.date as an object array in +# the date column +datafile = cbook.get_sample_data('goog.npy') +price_data = np.load(datafile).view(np.recarray) +price_data = price_data[-250:] # get the most recent 250 trading days + +delta1 = np.diff(price_data.adj_close)/price_data.adj_close[:-1] + +# Marker size in units of points^2 +volume = (15 * price_data.volume[:-2] / price_data.volume[0])**2 +close = 0.003 * price_data.close[:-2] / 0.003 * price_data.open[:-2] + +fig, ax = plt.subplots() +ax.scatter(delta1[:-1], delta1[1:], c=close, s=volume, alpha=0.5) + +ax.set_xlabel(r'$\Delta_i$', fontsize=20) +ax.set_ylabel(r'$\Delta_{i+1}$', fontsize=20) +ax.set_title('Volume and percent change') + +ax.grid(True) +fig.tight_layout() + +plt.show() +``` + + +![png](output_33_0.png) + + +## 设置按钮 + +`matplotlib.widgets` 模块: + + +```python +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.widgets import Slider, Button, RadioButtons + +fig, ax = plt.subplots() +plt.subplots_adjust(left=0.25, bottom=0.25) +t = np.arange(0.0, 1.0, 0.001) +a0 = 5 +f0 = 3 +s = a0*np.sin(2*np.pi*f0*t) +l, = plt.plot(t,s, lw=2, color='red') +plt.axis([0, 1, -10, 10]) + +axcolor = 'lightgoldenrodyellow' +axfreq = plt.axes([0.25, 0.1, 0.65, 0.03], axisbg=axcolor) +axamp = plt.axes([0.25, 0.15, 0.65, 0.03], axisbg=axcolor) + +sfreq = Slider(axfreq, 'Freq', 0.1, 30.0, valinit=f0) +samp = Slider(axamp, 'Amp', 0.1, 10.0, valinit=a0) + +def update(val): + amp = samp.val + freq = sfreq.val + l.set_ydata(amp*np.sin(2*np.pi*freq*t)) + fig.canvas.draw_idle() +sfreq.on_changed(update) +samp.on_changed(update) + +resetax = plt.axes([0.8, 0.025, 0.1, 0.04]) +button = Button(resetax, 'Reset', color=axcolor, hovercolor='0.975') +def reset(event): + sfreq.reset() + samp.reset() +button.on_clicked(reset) + +rax = plt.axes([0.025, 0.5, 0.15, 0.15], axisbg=axcolor) +radio = RadioButtons(rax, ('red', 'blue', 'green'), active=0) +def colorfunc(label): + l.set_color(label) + fig.canvas.draw_idle() +radio.on_clicked(colorfunc) + +plt.show() +``` + + +![png](output_36_0.png) + + +## 填充曲线 + +`fill` 函数: + + +```python +import numpy as np +import matplotlib.pyplot as plt + + +x = np.linspace(0, 1) +y = np.sin(4 * np.pi * x) * np.exp(-5 * x) + +plt.fill(x, y, 'r') +plt.grid(True) +plt.show() +``` + + +![png](output_39_0.png) + + +## 时间刻度 + + +```python +""" +Show how to make date plots in matplotlib using date tick locators and +formatters. See major_minor_demo1.py for more information on +controlling major and minor ticks + +All matplotlib date plotting is done by converting date instances into +days since the 0001-01-01 UTC. The conversion, tick locating and +formatting is done behind the scenes so this is most transparent to +you. The dates module provides several converter functions date2num +and num2date + +""" +import datetime +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.dates as mdates +import matplotlib.cbook as cbook + +years = mdates.YearLocator() # every year +months = mdates.MonthLocator() # every month +yearsFmt = mdates.DateFormatter('%Y') + +# load a numpy record array from yahoo csv data with fields date, +# open, close, volume, adj_close from the mpl-data/example directory. +# The record array stores python datetime.date as an object array in +# the date column +datafile = cbook.get_sample_data('goog.npy') +r = np.load(datafile).view(np.recarray) + +fig, ax = plt.subplots() +ax.plot(r.date, r.adj_close) + + +# format the ticks +ax.xaxis.set_major_locator(years) +ax.xaxis.set_major_formatter(yearsFmt) +ax.xaxis.set_minor_locator(months) + +datemin = datetime.date(r.date.min().year, 1, 1) +datemax = datetime.date(r.date.max().year+1, 1, 1) +ax.set_xlim(datemin, datemax) + +# format the coords message box +def price(x): return '$%1.2f'%x +ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') +ax.format_ydata = price +ax.grid(True) + +# rotates and right aligns the x labels, and moves the bottom of the +# axes up to make room for them +fig.autofmt_xdate() + +plt.show() +``` + + +![png](output_41_0.png) + + +## 金融数据 + + +```python +import datetime +import numpy as np +import matplotlib.colors as colors +import matplotlib.finance as finance +import matplotlib.dates as mdates +import matplotlib.ticker as mticker +import matplotlib.mlab as mlab +import matplotlib.pyplot as plt +import matplotlib.font_manager as font_manager + + +startdate = datetime.date(2006,1,1) +today = enddate = datetime.date.today() +ticker = 'SPY' + + +fh = finance.fetch_historical_yahoo(ticker, startdate, enddate) +# a numpy record array with fields: date, open, high, low, close, volume, adj_close) + +r = mlab.csv2rec(fh); fh.close() +r.sort() + + +def moving_average(x, n, type='simple'): + """ + compute an n period moving average. + + type is 'simple' | 'exponential' + + """ + x = np.asarray(x) + if type=='simple': + weights = np.ones(n) + else: + weights = np.exp(np.linspace(-1., 0., n)) + + weights /= weights.sum() + + + a = np.convolve(x, weights, mode='full')[:len(x)] + a[:n] = a[n] + return a + +def relative_strength(prices, n=14): + """ + compute the n period relative strength indicator + http://stockcharts.com/school/doku.php?id=chart_school:glossary_r#relativestrengthindex + http://www.investopedia.com/terms/r/rsi.asp + """ + + deltas = np.diff(prices) + seed = deltas[:n+1] + up = seed[seed>=0].sum()/n + down = -seed[seed<0].sum()/n + rs = up/down + rsi = np.zeros_like(prices) + rsi[:n] = 100. - 100./(1.+rs) + + for i in range(n, len(prices)): + delta = deltas[i-1] # cause the diff is 1 shorter + + if delta>0: + upval = delta + downval = 0. + else: + upval = 0. + downval = -delta + + up = (up*(n-1) + upval)/n + down = (down*(n-1) + downval)/n + + rs = up/down + rsi[i] = 100. - 100./(1.+rs) + + return rsi + +def moving_average_convergence(x, nslow=26, nfast=12): + """ + compute the MACD (Moving Average Convergence/Divergence) using a fast and slow exponential moving avg' + return value is emaslow, emafast, macd which are len(x) arrays + """ + emaslow = moving_average(x, nslow, type='exponential') + emafast = moving_average(x, nfast, type='exponential') + return emaslow, emafast, emafast - emaslow + + +plt.rc('axes', grid=True) +plt.rc('grid', color='0.75', linestyle='-', linewidth=0.5) + +textsize = 9 +left, width = 0.1, 0.8 +rect1 = [left, 0.7, width, 0.2] +rect2 = [left, 0.3, width, 0.4] +rect3 = [left, 0.1, width, 0.2] + + +fig = plt.figure(facecolor='white') +axescolor = '#f6f6f6' # the axes background color + +ax1 = fig.add_axes(rect1, axisbg=axescolor) #left, bottom, width, height +ax2 = fig.add_axes(rect2, axisbg=axescolor, sharex=ax1) +ax2t = ax2.twinx() +ax3 = fig.add_axes(rect3, axisbg=axescolor, sharex=ax1) + + + +### plot the relative strength indicator +prices = r.adj_close +rsi = relative_strength(prices) +fillcolor = 'darkgoldenrod' + +ax1.plot(r.date, rsi, color=fillcolor) +ax1.axhline(70, color=fillcolor) +ax1.axhline(30, color=fillcolor) +ax1.fill_between(r.date, rsi, 70, where=(rsi>=70), facecolor=fillcolor, edgecolor=fillcolor) +ax1.fill_between(r.date, rsi, 30, where=(rsi<=30), facecolor=fillcolor, edgecolor=fillcolor) +ax1.text(0.6, 0.9, '>70 = overbought', va='top', transform=ax1.transAxes, fontsize=textsize) +ax1.text(0.6, 0.1, '<30 = oversold', transform=ax1.transAxes, fontsize=textsize) +ax1.set_ylim(0, 100) +ax1.set_yticks([30,70]) +ax1.text(0.025, 0.95, 'RSI (14)', va='top', transform=ax1.transAxes, fontsize=textsize) +ax1.set_title('%s daily'%ticker) + +### plot the price and volume data +dx = r.adj_close - r.close +low = r.low + dx +high = r.high + dx + +deltas = np.zeros_like(prices) +deltas[1:] = np.diff(prices) +up = deltas>0 +ax2.vlines(r.date[up], low[up], high[up], color='black', label='_nolegend_') +ax2.vlines(r.date[~up], low[~up], high[~up], color='black', label='_nolegend_') +ma20 = moving_average(prices, 20, type='simple') +ma200 = moving_average(prices, 200, type='simple') + +linema20, = ax2.plot(r.date, ma20, color='blue', lw=2, label='MA (20)') +linema200, = ax2.plot(r.date, ma200, color='red', lw=2, label='MA (200)') + + +last = r[-1] +s = '%s O:%1.2f H:%1.2f L:%1.2f C:%1.2f, V:%1.1fM Chg:%+1.2f' % ( + today.strftime('%d-%b-%Y'), + last.open, last.high, + last.low, last.close, + last.volume*1e-6, + last.close-last.open ) +t4 = ax2.text(0.3, 0.9, s, transform=ax2.transAxes, fontsize=textsize) + +props = font_manager.FontProperties(size=10) +leg = ax2.legend(loc='center left', shadow=True, fancybox=True, prop=props) +leg.get_frame().set_alpha(0.5) + + +volume = (r.close*r.volume)/1e6 # dollar volume in millions +vmax = volume.max() +poly = ax2t.fill_between(r.date, volume, 0, label='Volume', facecolor=fillcolor, edgecolor=fillcolor) +ax2t.set_ylim(0, 5*vmax) +ax2t.set_yticks([]) + + +### compute the MACD indicator +fillcolor = 'darkslategrey' +nslow = 26 +nfast = 12 +nema = 9 +emaslow, emafast, macd = moving_average_convergence(prices, nslow=nslow, nfast=nfast) +ema9 = moving_average(macd, nema, type='exponential') +ax3.plot(r.date, macd, color='black', lw=2) +ax3.plot(r.date, ema9, color='blue', lw=1) +ax3.fill_between(r.date, macd-ema9, 0, alpha=0.5, facecolor=fillcolor, edgecolor=fillcolor) + + +ax3.text(0.025, 0.95, 'MACD (%d, %d, %d)'%(nfast, nslow, nema), va='top', + transform=ax3.transAxes, fontsize=textsize) + +#ax3.set_yticks([]) +# turn off upper axis tick labels, rotate the lower ones, etc +for ax in ax1, ax2, ax2t, ax3: + if ax!=ax3: + for label in ax.get_xticklabels(): + label.set_visible(False) + else: + for label in ax.get_xticklabels(): + label.set_rotation(30) + label.set_horizontalalignment('right') + + ax.fmt_xdata = mdates.DateFormatter('%Y-%m-%d') + + + +class MyLocator(mticker.MaxNLocator): + def __init__(self, *args, **kwargs): + mticker.MaxNLocator.__init__(self, *args, **kwargs) + + def __call__(self, *args, **kwargs): + return mticker.MaxNLocator.__call__(self, *args, **kwargs) + +# at most 5 ticks, pruning the upper and lower so they don't overlap +# with other ticks +#ax2.yaxis.set_major_locator(mticker.MaxNLocator(5, prune='both')) +#ax3.yaxis.set_major_locator(mticker.MaxNLocator(5, prune='both')) + +ax2.yaxis.set_major_locator(MyLocator(5, prune='both')) +ax3.yaxis.set_major_locator(MyLocator(5, prune='both')) + +plt.show() +``` + + +![png](output_43_0.png) + + +## basemap 画地图 + +需要安装 `basemap` 包: + + +```python +import matplotlib.pyplot as plt +import numpy as np + +try: + from mpl_toolkits.basemap import Basemap + have_basemap = True +except ImportError: + have_basemap = False + + +def plotmap(): + # create figure + fig = plt.figure(figsize=(8,8)) + # set up orthographic map projection with + # perspective of satellite looking down at 50N, 100W. + # use low resolution coastlines. + map = Basemap(projection='ortho',lat_0=50,lon_0=-100,resolution='l') + # lat/lon coordinates of five cities. + lats=[40.02,32.73,38.55,48.25,17.29] + lons=[-105.16,-117.16,-77.00,-114.21,-88.10] + cities=['Boulder, CO','San Diego, CA', + 'Washington, DC','Whitefish, MT','Belize City, Belize'] + # compute the native map projection coordinates for cities. + xc,yc = map(lons,lats) + # make up some data on a regular lat/lon grid. + nlats = 73; nlons = 145; delta = 2.*np.pi/(nlons-1) + lats = (0.5*np.pi-delta*np.indices((nlats,nlons))[0,:,:]) + lons = (delta*np.indices((nlats,nlons))[1,:,:]) + wave = 0.75*(np.sin(2.*lats)**8*np.cos(4.*lons)) + mean = 0.5*np.cos(2.*lats)*((np.sin(2.*lats))**2 + 2.) + # compute native map projection coordinates of lat/lon grid. + # (convert lons and lats to degrees first) + x, y = map(lons*180./np.pi, lats*180./np.pi) + # draw map boundary + map.drawmapboundary(color="0.9") + # draw graticule (latitude and longitude grid lines) + map.drawmeridians(np.arange(0,360,30),color="0.9") + map.drawparallels(np.arange(-90,90,30),color="0.9") + # plot filled circles at the locations of the cities. + map.plot(xc,yc,'wo') + # plot the names of five cities. + for name,xpt,ypt in zip(cities,xc,yc): + plt.text(xpt+100000,ypt+100000,name,fontsize=9,color='w') + # contour data over the map. + cs = map.contour(x,y,wave+mean,15,linewidths=1.5) + # draw blue marble image in background. + # (downsample the image by 50% for speed) + map.bluemarble(scale=0.5) + +def plotempty(): + # create figure + fig = plt.figure(figsize=(8,8)) + fig.text(0.5, 0.5, "Sorry, could not import Basemap", + horizontalalignment='center') + +if have_basemap: + plotmap() +else: + plotempty() +plt.show() + +``` + + +![png](output_46_0.png) + + +## 对数图 + +`loglog, semilogx, semilogy, errorbar` 函数: + + +```python +import numpy as np +import matplotlib.pyplot as plt + +plt.subplots_adjust(hspace=0.4) +t = np.arange(0.01, 20.0, 0.01) + +# log y axis +plt.subplot(221) +plt.semilogy(t, np.exp(-t/5.0)) +plt.title('semilogy') +plt.grid(True) + +# log x axis +plt.subplot(222) +plt.semilogx(t, np.sin(2*np.pi*t)) +plt.title('semilogx') +plt.grid(True) + +# log x and y axis +plt.subplot(223) +plt.loglog(t, 20*np.exp(-t/10.0), basex=2) +plt.grid(True) +plt.title('loglog base 4 on x') + +# with errorbars: clip non-positive values +ax = plt.subplot(224) +ax.set_xscale("log", nonposx='clip') +ax.set_yscale("log", nonposy='clip') + +x = 10.0**np.linspace(0.0, 2.0, 20) +y = x**2.0 +plt.errorbar(x, y, xerr=0.1*x, yerr=5.0+0.75*y) +ax.set_ylim(ymin=0.1) +ax.set_title('Errorbars go negative') + + +plt.show() +``` + + +![png](output_49_0.png) + + +## 极坐标 + +设置 `polar=True`: + + +```python +import numpy as np +import matplotlib.pyplot as plt + + +r = np.arange(0, 3.0, 0.01) +theta = 2 * np.pi * r + +ax = plt.subplot(111, polar=True) +ax.plot(theta, r, color='r', linewidth=3) +ax.set_rmax(2.0) +ax.grid(True) + +ax.set_title("A line plot on a polar axis", va='bottom') +plt.show() +``` + + +![png](output_52_0.png) + + +## 标注 + +`legend` 函数: + + +```python +import numpy as np +import matplotlib.pyplot as plt + +# Make some fake data. +a = b = np.arange(0,3, .02) +c = np.exp(a) +d = c[::-1] + +# Create plots with pre-defined labels. +plt.plot(a, c, 'k--', label='Model length') +plt.plot(a, d, 'k:', label='Data length') +plt.plot(a, c+d, 'k', label='Total message length') + +legend = plt.legend(loc='upper center', shadow=True, fontsize='x-large') + +# Put a nicer background color on the legend. +legend.get_frame().set_facecolor('#00FFCC') + +plt.show() +``` + + +![png](output_55_0.png) + + +## 数学公式 + + +```python +from __future__ import print_function +import matplotlib.pyplot as plt +import os +import sys +import re +import gc + +# Selection of features following "Writing mathematical expressions" tutorial +mathtext_titles = { + 0: "Header demo", + 1: "Subscripts and superscripts", + 2: "Fractions, binomials and stacked numbers", + 3: "Radicals", + 4: "Fonts", + 5: "Accents", + 6: "Greek, Hebrew", + 7: "Delimiters, functions and Symbols"} +n_lines = len(mathtext_titles) + +# Randomly picked examples +mathext_demos = { + 0: r"$W^{3\beta}_{\delta_1 \rho_1 \sigma_2} = " + r"U^{3\beta}_{\delta_1 \rho_1} + \frac{1}{8 \pi 2} " + r"\int^{\alpha_2}_{\alpha_2} d \alpha^\prime_2 \left[\frac{ " + r"U^{2\beta}_{\delta_1 \rho_1} - \alpha^\prime_2U^{1\beta}_" + r"{\rho_1 \sigma_2} }{U^{0\beta}_{\rho_1 \sigma_2}}\right]$", + + 1: r"$\alpha_i > \beta_i,\ " + r"\alpha_{i+1}^j = {\rm sin}(2\pi f_j t_i) e^{-5 t_i/\tau},\ " + r"\ldots$", + + 2: r"$\frac{3}{4},\ \binom{3}{4},\ \stackrel{3}{4},\ " + r"\left(\frac{5 - \frac{1}{x}}{4}\right),\ \ldots$", + + 3: r"$\sqrt{2},\ \sqrt[3]{x},\ \ldots$", + + 4: r"$\mathrm{Roman}\ , \ \mathit{Italic}\ , \ \mathtt{Typewriter} \ " + r"\mathrm{or}\ \mathcal{CALLIGRAPHY}$", + + 5: r"$\acute a,\ \bar a,\ \breve a,\ \dot a,\ \ddot a, \ \grave a, \ " + r"\hat a,\ \tilde a,\ \vec a,\ \widehat{xyz},\ \widetilde{xyz},\ " + r"\ldots$", + + 6: r"$\alpha,\ \beta,\ \chi,\ \delta,\ \lambda,\ \mu,\ " + r"\Delta,\ \Gamma,\ \Omega,\ \Phi,\ \Pi,\ \Upsilon,\ \nabla,\ " + r"\aleph,\ \beth,\ \daleth,\ \gimel,\ \ldots$", + + 7: r"$\coprod,\ \int,\ \oint,\ \prod,\ \sum,\ " + r"\log,\ \sin,\ \approx,\ \oplus,\ \star,\ \varpropto,\ " + r"\infty,\ \partial,\ \Re,\ \leftrightsquigarrow, \ \ldots$"} + + +def doall(): + # Colors used in mpl online documentation. + mpl_blue_rvb = (191./255., 209./256., 212./255.) + mpl_orange_rvb = (202/255., 121/256., 0./255.) + mpl_grey_rvb = (51./255., 51./255., 51./255.) + + # Creating figure and axis. + plt.figure(figsize=(6, 7)) + plt.axes([0.01, 0.01, 0.98, 0.90], axisbg="white", frameon=True) + plt.gca().set_xlim(0., 1.) + plt.gca().set_ylim(0., 1.) + plt.gca().set_title("Matplotlib's math rendering engine", + color=mpl_grey_rvb, fontsize=14, weight='bold') + plt.gca().set_xticklabels("", visible=False) + plt.gca().set_yticklabels("", visible=False) + + # Gap between lines in axes coords + line_axesfrac = (1. / (n_lines)) + + # Plotting header demonstration formula + full_demo = mathext_demos[0] + plt.annotate(full_demo, + xy=(0.5, 1. - 0.59*line_axesfrac), + xycoords='data', color=mpl_orange_rvb, ha='center', + fontsize=20) + + # Plotting features demonstration formulae + for i_line in range(1, n_lines): + baseline = 1. - (i_line)*line_axesfrac + baseline_next = baseline - line_axesfrac*1. + title = mathtext_titles[i_line] + ":" + fill_color = ['white', mpl_blue_rvb][i_line % 2] + plt.fill_between([0., 1.], [baseline, baseline], + [baseline_next, baseline_next], + color=fill_color, alpha=0.5) + plt.annotate(title, + xy=(0.07, baseline - 0.3*line_axesfrac), + xycoords='data', color=mpl_grey_rvb, weight='bold') + demo = mathext_demos[i_line] + plt.annotate(demo, + xy=(0.05, baseline - 0.75*line_axesfrac), + xycoords='data', color=mpl_grey_rvb, + fontsize=16) + + for i in range(n_lines): + s = mathext_demos[i] + print(i, s) + plt.show() + +if '--latex' in sys.argv: + # Run: python mathtext_examples.py --latex + # Need amsmath and amssymb packages. + fd = open("mathtext_examples.ltx", "w") + fd.write("\\documentclass{article}\n") + fd.write("\\usepackage{amsmath, amssymb}\n") + fd.write("\\begin{document}\n") + fd.write("\\begin{enumerate}\n") + + for i in range(n_lines): + s = mathext_demos[i] + s = re.sub(r"(? \beta_i,\ \alpha_{i+1}^j = {\rm sin}(2\pi f_j t_i) e^{-5 t_i/\tau},\ \ldots$ + 2 $\frac{3}{4},\ \binom{3}{4},\ \stackrel{3}{4},\ \left(\frac{5 - \frac{1}{x}}{4}\right),\ \ldots$ + 3 $\sqrt{2},\ \sqrt[3]{x},\ \ldots$ + 4 $\mathrm{Roman}\ , \ \mathit{Italic}\ , \ \mathtt{Typewriter} \ \mathrm{or}\ \mathcal{CALLIGRAPHY}$ + 5 $\acute a,\ \bar a,\ \breve a,\ \dot a,\ \ddot a, \ \grave a, \ \hat a,\ \tilde a,\ \vec a,\ \widehat{xyz},\ \widetilde{xyz},\ \ldots$ + 6 $\alpha,\ \beta,\ \chi,\ \delta,\ \lambda,\ \mu,\ \Delta,\ \Gamma,\ \Omega,\ \Phi,\ \Pi,\ \Upsilon,\ \nabla,\ \aleph,\ \beth,\ \daleth,\ \gimel,\ \ldots$ + 7 $\coprod,\ \int,\ \oint,\ \prod,\ \sum,\ \log,\ \sin,\ \approx,\ \oplus,\ \star,\ \varpropto,\ \infty,\ \partial,\ \Re,\ \leftrightsquigarrow, \ \ldots$ + + + +![png](output_57_1.png) + diff --git a/docs/07-interfacing-with-other-languages/07.01-introduction.md b/docs/07-interfacing-with-other-languages/07.01-introduction.md new file mode 100644 index 00000000..a45399ca --- /dev/null +++ b/docs/07-interfacing-with-other-languages/07.01-introduction.md @@ -0,0 +1,28 @@ + +# 简介 + +## 使用 Python 和另一种语言混编的好处 + +至少有以下四个原因: + +1. `Best of both worlds` - 结合两种语言的优点:已经优化和测试过的代码库 + Python 的灵活 +- `Python as glue` - **Python** 作为连接的桥梁,将很多其他语言的模块结合到一个大型程序中 +- `Speed up Python` - 使用一个更快的语言帮助加速 **Python** +- `Division of labor` - 各司其职,让各个语言做各自更擅长的事情,例如 **Fortran** 进行数组计算,**Python** 处理测试,文件读写,文本处理,数据整理,GUI 生成,HTTP 服务等等。 + +## 语言扩展工具 + +### 打包已有的代码和其他语言的库 + +- 使用手写的扩展模块 +- `Cython` - **C/C++** +- `SWIG` - **C/C++** +- `f2py` - **Fortran** +- `ctypes` - 其他语言库 + +### 加速 Python + +- 使用手写的扩展模块 +- `Cython` +- `Weave` +- `Shedskin` 和其他模块 diff --git a/docs/07-interfacing-with-other-languages/07.02-python-extension-modules.md b/docs/07-interfacing-with-other-languages/07.02-python-extension-modules.md new file mode 100644 index 00000000..98a16182 --- /dev/null +++ b/docs/07-interfacing-with-other-languages/07.02-python-extension-modules.md @@ -0,0 +1,288 @@ + +# Python 扩展模块 + +## 简介 + +C Library | Interface | Python +---|---|--- +`c header`
`c implementation` | Wrapper `C` $\leftrightarrows$ `Python`
communication between `py + c` | `import fact`
`fact.fact(10)` + +**Python** 扩展模块将 `PyInt(10)` 转化为 `CInt(10)` 然后调用 `C` 程序中的 `fact()` 函数进行计算,再将返回的结果转换回 `PyInt`。 + +## 产生一个扩展模块 + +假设我们有这样的一个头文件和程序: + + +```python +%%file fact.h +#ifndef FACT_H +#define FACT_h +int fact(int n); +#endif +``` + + Writing fact.h + + + +```python +%%file fact.c +#include "fact.h" +int fact(int n) +{ + if (n <= 1) return 1; + else return n * fact(n - 1); +} +``` + + Writing fact.c + + +定义包装函数: + + +```python +%%file fact_wrap.c + +/* Must include Python.h before any standard headers*/ +#include +#include "fact.h" +static PyObject* wrap_fact(PyObject *self, PyObject *args) +{ + /* Python->C data conversion */ + int n, result; + // the string i here means there is only one integer + if (!PyArg_ParseTuple(args, "i", &n)) + return NULL; + + /* C Function Call */ + result = fact(n); + + /* C->Python data conversion */ + return Py_BuildValue("i", result); +} + +/* Method table declaring the names of functions exposed to Python*/ +static PyMethodDef ExampleMethods[] = { + {"fact", wrap_fact, METH_VARARGS, "Calculate the factorial of n"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +/* Module initialization function called at "import example"*/ +PyMODINIT_FUNC +initexample(void) +{ + (void) Py_InitModule("example", ExampleMethods); +} +``` + + Writing fact_wrap.c + + +## 手动编译扩展模块 + +手动使用 `gcc` 编译,`Windows` 下如果没有 `gcc`,可以通过 `conda` 进行安装: + + conda install mingw4 + +`Window 64-bit` 下编译需要加上 `-DMS_WIN64` 的选项,`include` 和 `lib` 文件夹的路径对应于本地 **Python** 安装的环境: + + +```python +!gcc -DMS_WIN64 -c fact.c fact_wrap.c -IC:\Miniconda\include +``` + + +```python +!gcc -DMS_WIN64 -shared fact.o fact_wrap.o -LC:\Miniconda\libs -lpython27 -o example.pyd +``` + +`Windows` 下最终生成的文件后缀为 `.pyd` , `Unix` 下生成的文件后缀名为 `.so`。 + +用法为: + +- `Windows 32-bit` +``` +gcc -c fact.c fact_wrap.c -I\include +gcc -shared fact.o fact_wrap.o -L\libs -lpython27 -o example.pyd +``` +- `Unix` +``` +gcc -c fact.c fact_wrap.c -I +gcc -shared fact.o fact_wrap.o -L\config -lpython27 -o example.so +``` + +编译完成后,我们就可以使用 `example` 这个模块了。 + +导入生成的包: + + +```python +import example +print dir(example) +``` + + ['__doc__', '__file__', '__name__', '__package__', 'fact'] + + +使用 `example` 中的函数: + + +```python +print 'factorial of 10:', example.fact(10) +``` + + factorial of 10: 3628800 + + +## 使用 setup.py 进行编译 + +清理刚才生成的文件: + + +```python +!rm -f example.pyd +``` + +写入 `setup.py`: + + +```python +%%file setup.py +from distutils.core import setup, Extension + +ext = Extension(name='example', sources=['fact_wrap.c', 'fact.c']) + +setup(name='example', ext_modules=[ext]) +``` + + Writing setup.py + + +使用 `distutils` 中的函数,我们进行 `build` 和 `install`: + + python setup.py build (--compiler=mingw64) + python setup.py install + +括号中的内容在 `windows` 中可能需要加上。 + +这里我们使用 `build_ext --inplace` 选项将其安装在本地文件夹: + + +```python +!python setup.py build_ext --inplace +``` + + running build_ext + building 'example' extension + creating build + creating build\temp.win-amd64-2.7 + creating build\temp.win-amd64-2.7\Release + C:\Miniconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Miniconda\include -IC:\Miniconda\PC -c fact_wrap.c -o build\temp.win-amd64-2.7\Release\fact_wrap.o + C:\Miniconda\Scripts\gcc.bat -DMS_WIN64 -mdll -O -Wall -IC:\Miniconda\include -IC:\Miniconda\PC -c fact.c -o build\temp.win-amd64-2.7\Release\fact.o + writing build\temp.win-amd64-2.7\Release\example.def + C:\Miniconda\Scripts\gcc.bat -DMS_WIN64 -shared -s build\temp.win-amd64-2.7\Release\fact_wrap.o build\temp.win-amd64-2.7\Release\fact.o build\temp.win-amd64-2.7\Release\example.def -LC:\Miniconda\libs -LC:\Miniconda\PCbuild\amd64 -lpython27 -lmsvcr90 -o "C:\Users\Jin\Documents\Git\python-tutorial\07. interfacing with other languages\example.pyd" + + +## 使用编译的模块 + +进行测试: + + +```python +import example + +print 'factorial of 10:', example.fact(10) +``` + + factorial of 10: 3628800 + + +定义 `Python` 函数: + + +```python +def pyfact(n): + if n <= 1: return 1 + return n * pyfact(n-1) + +print pyfact(10) +print example.fact(10) +``` + + 3628800 + 3628800 + + +时间测试: + + +```python +%timeit example.fact(10) +``` + + The slowest run took 13.17 times longer than the fastest. This could mean that an intermediate result is being cached + 1000000 loops, best of 3: 213 ns per loop + + + +```python +%timeit pyfact(10) +``` + + 1000000 loops, best of 3: 1.43 µs per loop + + +如果使用 `fact` 计算比较大的值: + + +```python +example.fact(100) +``` + + + + + 0 + + + +会出现溢出的结果,因为 `int` 表示的值有限,但是 `pyfact` 不会有这样的问题: + + +```python +pyfact(100) +``` + + + + + 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000L + + + +将生成的文件压缩到压缩文件中: + + +```python +import zipfile + +f = zipfile.ZipFile('07-02-example.zip','w',zipfile.ZIP_DEFLATED) + +names = 'fact.o fact_wrap.c fact_wrap.o example.pyd setup.py'.split() +for name in names: + f.write(name) + +f.close() +``` + +清理生成的文件: + + +```python +!rm -f fact*.* +!rm -f example.* +!rm -f setup*.* +!rm -rf build +``` diff --git a/docs/07-interfacing-with-other-languages/07.03-cython-part-1.md b/docs/07-interfacing-with-other-languages/07.03-cython-part-1.md new file mode 100644 index 00000000..b32f7da9 --- /dev/null +++ b/docs/07-interfacing-with-other-languages/07.03-cython-part-1.md @@ -0,0 +1,215 @@ + +# Cython:Cython 基础,将源代码转换成扩展模块 + +## Cython 基础 + +之前使用了手动的方法对 `C` 程序进行编译,而 `Cython` 则简化了这个过程。 + +考虑之前的斐波拉契数列,`Python` 版本: + +```python +def fib(n): + a,b = 1,1 + for i in range(n): + a,b = a+b, a + return a +``` + +`C` 版本: + +```cpp +int fib(int n) { + int tmp, i, a, b; + a = b = 1; + for (i=0; i + + + +调用这个函数: + + +```python +libc.printf("%s, %d\n", "hello", 5) +``` + + + + + 9 + + + +这里显示的 `9` 是 `printf` 的返回值表示显示的字符串的长度(包括结尾的 `'\0'`),但是并没有显示结果,原因是 `printf` 函数默认是写在标准输出流上的,与 `IPython` 使用的输出流不一样,所以没有显示结果。 + +## C 数学库 + +找到数学库: + + +```python +libm_name = util.find_library('m') + +print libm_name +``` + + msvcr90.dll + + +调用 `atan2` 函数: + + +```python +libm = CDLL(libm_name) + +libm.atan2(1.0, 2.0) +``` + + + --------------------------------------------------------------------------- + + ArgumentError Traceback (most recent call last) + + in () + 1 libm = CDLL(libm_name) + 2 + ----> 3 libm.atan2(1.0, 2.0) + + + ArgumentError: argument 1: : Don't know how to convert parameter 1 + + +调用这个函数出错,原因是我们需要进行一些额外工作,告诉 `Python` 函数的参数和返回值是什么样的: + + +```python +from ctypes import c_double + +libm.atan2.argtypes = [c_double, c_double] +libm.atan2.restype = c_double +``` + + +```python +libm.atan2(1.0, 2.0) +``` + + + + + 0.4636476090008061 + + + +与 `Python` 数学库中的结果一致: + + +```python +from math import atan2 +``` + + +```python +atan2(1.0, 2.0) +``` + + + + + 0.4636476090008061 + + + +## Numpy 和 ctypes + +假设我们有这样的一个函数: +```c +float _sum(float *vec, int len) { + float sum = 0.0; + int i; + for (i = 0; i < len; i++) { + sum += vec[i]; + } + return sum +} +``` + +并且已经编译成动态链接库,那么我们可以这样调用: + +```python +from ctypes import c_float, CDLL, c_int +from numpy import array, float32 +from numpy.ctypeslib import ndpointer + +x = array([1,2,3,4], dtype=float32) + +lib = CDLL() + +ptr = ndpointer(float32, ndim=1, flags='C') +lib._sum.argtypes = [ptr, c_int] +lib._sum.restype = c_float + +result = lib._sum(x, len(x)) +``` diff --git a/docs/08-object-oriented-programming/08.01-oop-introduction.md b/docs/08-object-oriented-programming/08.01-oop-introduction.md new file mode 100644 index 00000000..07629b6f --- /dev/null +++ b/docs/08-object-oriented-programming/08.01-oop-introduction.md @@ -0,0 +1,103 @@ + +# 简介 + +## 属性 attributes + +属性是与对象绑定的一组数据,可以只读,只写,或者读写,使用时不加括号,例如: + + +```python +f = file("new_file", 'w') +``` + +显示模式属性: + + +```python +f.mode +``` + + + + + 'w' + + + +是否关闭: + + +```python +f.closed +``` + + + + + False + + + +`mode` 是只读属性,所以这样会报错: + + +```python +f.mode = 'r' +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 f.mode = 'r' + + + TypeError: readonly attribute + + +获取属性不需要加括号: + + +```python +f.mode() +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 f.mode() + + + TypeError: 'str' object is not callable + + +## 方法 method + +方法是与属性绑定的一组函数,需要使用括号,作用于对象本身: + + +```python +f.write('Hi.\n') +f.seek(0) +f.write('Hola!\n') +f.close() +``` + + +```python +!rm new_file +``` + +## 使用 OPP 的原因 + +- 构建自己的类型来模拟真实世界的对象 +- 处理抽象对象 +- 容易复用和扩展 +- 理解其他 OPP 代码 +- GUI 通常使用 OPP 规则编写 +- ... diff --git a/docs/08-object-oriented-programming/08.02-using-oop-model-a-forest-fire.md b/docs/08-object-oriented-programming/08.02-using-oop-model-a-forest-fire.md new file mode 100644 index 00000000..5d2ad65e --- /dev/null +++ b/docs/08-object-oriented-programming/08.02-using-oop-model-a-forest-fire.md @@ -0,0 +1,171 @@ + +# 使用 OOP 对森林火灾建模 + + +```python +%matplotlib inline + +import matplotlib.pyplot as plt +import numpy as np +``` + +## 对森林建模 + + +```python +class Forest(object): + def __init__(self, size=(150, 150), p_sapling=0.0025, p_lightning=5.e-6, name=None): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.forest_fires = np.zeros(self.size, dtype=bool) + self.p_sapling = p_sapling + self.p_lightning = p_lightning + if name is not None: + self.name = name + else: + self.name = self.__class__.__name__ + + @property + def num_cells(self): + return self.size[0] * self.size[1] + + @property + def tree_fraction(self): + return self.trees.sum() / float(self.num_cells) + + @property + def fire_fraction(self): + return self.forest_fires.sum() / float(self.num_cells) + + def advance_one_step(self): + self.grow_trees() + self.start_fires() + self.burn_trees() + + def grow_trees(self): + growth_sites = self._rand_bool(self.p_sapling) + self.trees[growth_sites] = True + + def start_fires(self): + lightning_strikes = (self._rand_bool(self.p_lightning) & + self.trees) + self.forest_fires[lightning_strikes] = True + + def burn_trees(self): + fires = np.zeros((self.size[0] + 2, self.size[1] + 2), dtype=bool) + fires[1:-1, 1:-1] = self.forest_fires + north = fires[:-2, 1:-1] + south = fires[2:, 1:-1] + east = fires[1:-1, :-2] + west = fires[1:-1, 2:] + new_fires = (north | south | east | west) & self.trees + self.trees[self.forest_fires] = False + self.forest_fires = new_fires + + def _rand_bool(self, p): + return np.random.uniform(size=self.trees.shape) < p +``` + +定义一个森林类之后,我们创建一个新的森林类对象: + + +```python +forest = Forest() +``` + +显示当前的状态: + + +```python +print forest.trees +``` + + [[False False False ..., False False False] + [False False False ..., False False False] + [False False False ..., False False False] + ..., + [False False False ..., False False False] + [False False False ..., False False False] + [False False False ..., False False False]] + + + +```python +print forest.forest_fires +``` + + [[False False False ..., False False False] + [False False False ..., False False False] + [False False False ..., False False False] + ..., + [False False False ..., False False False] + [False False False ..., False False False] + [False False False ..., False False False]] + + +使用 `matshow` 进行可视化: + + +```python +plt.matshow(forest.trees, cmap=plt.cm.Greens) + +plt.show() +``` + + +![png](output_10_0.png) + + +## 模拟森林生长和火灾的过程 + +经过一段时间: + + +```python +forest.advance_one_step() +plt.matshow(forest.trees, cmap=plt.cm.Greens) +plt.show() +``` + + +![png](output_13_0.png) + + +循环很长时间: + + +```python +for i in range(500): + forest.advance_one_step() +plt.matshow(forest.trees, cmap=plt.cm.Greens) +print forest.tree_fraction +``` + + 0.253111111111 + + + +![png](output_15_1.png) + + +迭代更长时间: + + +```python +forest = Forest() +tree_fractions = [] +for i in range(5000): + forest.advance_one_step() + tree_fractions.append(forest.tree_fraction) +fig = plt.figure() +ax0 = fig.add_subplot(1,2,1) +ax0.matshow(forest.trees, cmap=plt.cm.Greens) +ax1 = fig.add_subplot(1,2,2) +ax1.plot(tree_fractions) + +plt.show() +``` + + +![png](output_17_0.png) + diff --git a/docs/08-object-oriented-programming/08.03-what-is-a-object.md b/docs/08-object-oriented-programming/08.03-what-is-a-object.md new file mode 100644 index 00000000..82466ff4 --- /dev/null +++ b/docs/08-object-oriented-programming/08.03-what-is-a-object.md @@ -0,0 +1,153 @@ + +# 什么是对象? + +在 `Python` 中,几乎所有的东西都是对象。 + +整数是对象: + + +```python +a = 257 +``` + + +```python +type(a) +``` + + + + + int + + + + +```python +id(a) +``` + + + + + 53187032L + + + +`b` 和 `a` 是同一个对象: + + +```python +b = a +``` + + +```python +id(b) +``` + + + + + 53187032L + + + + +```python +c = 258 +id(c) +``` + + + + + 53186960L + + + +函数: + + +```python +def foo(): + print 'hi' +``` + + +```python +type(foo) +``` + + + + + function + + + + +```python +id(foo) +``` + + + + + 63632664L + + + +`type` 函数本身也是对象: + + +```python +type(type) +``` + + + + + type + + + + +```python +id(type) +``` + + + + + 506070640L + + + +只有一些保留的关键词不是对象: + + +```python +id(if) +``` + + + File "", line 1 + id(if) + ^ + SyntaxError: invalid syntax + + + + +```python +id(+) +``` + + + File "", line 1 + id(+) + ^ + SyntaxError: invalid syntax + + diff --git a/docs/08-object-oriented-programming/08.04-writing-classes.md b/docs/08-object-oriented-programming/08.04-writing-classes.md new file mode 100644 index 00000000..966cc5fb --- /dev/null +++ b/docs/08-object-oriented-programming/08.04-writing-classes.md @@ -0,0 +1,142 @@ + +# 定义 class + +## 基本形式 + +`class` 定义如下: +```python +class ClassName(ParentClass): + """class docstring""" + def method(self): + return +``` + +- `class` 关键词在最前面 +- `ClassName` 通常采用 `CamelCase` 记法 +- 括号中的 `ParentClass` 用来表示继承关系 +- 冒号不能缺少 +- `""""""` 中的内容表示 `docstring`,可以省略 +- 方法定义与函数定义十分类似,不过多了一个 `self` 参数表示这个对象本身 +- `class` 中的方法要进行缩进 + + +``` +class Forest(object): + """ Forest can grow trees which eventually die.""" + pass +``` + +其中 `object` 是最基本的类型。 + +查看帮助: + + +``` +import numpy as np +np.info(Forest) +``` + + Forest() + + Forest can grow trees which eventually die. + + + Methods: + + + + +``` +forest = Forest() +``` + + +``` +forest +``` + + + + + <__main__.Forest at 0x3cda358> + + + +## 添加方法和属性 + +可以直接添加属性(有更好的替代方式): + + +``` +forest.trees = np.zeros((150, 150), dtype=bool) +``` + + +``` +forest.trees +``` + + + + + array([[False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + ..., + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False]], dtype=bool) + + + + +``` +forest2 = Forest() +``` + +`forest2` 没有这个属性: + + +``` +forest2.trees +``` + + + --------------------------------------------------------------------------- + + AttributeError Traceback (most recent call last) + + in () + ----> 1 forest2.trees + + + AttributeError: 'Forest' object has no attribute 'trees' + + +添加方法时,默认第一个参数是对象本身,一般为 `self`,可能用到也可能用不到,然后才是其他的参数: + + +``` +class Forest(object): + """ Forest can grow trees which eventually die.""" + def grow(self): + print "the tree is growing!" + + def number(self, num=1): + if num == 1: + print 'there is 1 tree.' + else: + print 'there are', num, 'trees.' +``` + + +``` +forest = Forest() + +forest.grow() +forest.number(12) +``` + + the tree is growing! + there are 12 trees. + diff --git a/docs/08-object-oriented-programming/08.05-special-method.md b/docs/08-object-oriented-programming/08.05-special-method.md new file mode 100644 index 00000000..3a685fc2 --- /dev/null +++ b/docs/08-object-oriented-programming/08.05-special-method.md @@ -0,0 +1,284 @@ + +# 特殊方法 + +**Python** 使用 `__` 开头的名字来定义特殊的方法和属性,它们有: + +- `__init__()` +- `__repr__()` +- `__str__()` +- `__call__()` +- `__iter__()` +- `__add__()` +- `__sub__()` +- `__mul__()` +- `__rmul__()` +- `__class__` +- `__name__` + +## 构造方法 `__init__()` + +之前说到,在产生对象之后,我们可以向对象中添加属性。事实上,还可以通过构造方法,在构造对象的时候直接添加属性: + + +```python +class Leaf(object): + """ + A leaf falling in the woods. + """ + def __init__(self, color='green'): + self.color = color +``` + +默认属性值: + + +```python +leaf1 = Leaf() + +print leaf1.color +``` + + green + + +传入有参数的值: + + +```python +leaf2 = Leaf('orange') + +print leaf2.color +``` + + orange + + +回到森林的例子: + + +```python +import numpy as np + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self): + self.trees = np.zeros((150,150), dtype=bool) + self.fires = np.zeros((150,150), dtype=bool) +``` + +我们在构造方法中定义了两个属性 `trees` 和 `fires`: + + +```python +forest = Forest() + +forest.trees +``` + + + + + array([[False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + ..., + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False]], dtype=bool) + + + + +```python +forest.fires +``` + + + + + array([[False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + ..., + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False]], dtype=bool) + + + +修改属性的值: + + +```python +forest.trees[0,0]=True +forest.trees +``` + + + + + array([[ True, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + ..., + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False]], dtype=bool) + + + +改变它的属性值不会影响其他对象的属性值: + + +```python +forest2 = Forest() + +forest2.trees +``` + + + + + array([[False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + ..., + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False], + [False, False, False, ..., False, False, False]], dtype=bool) + + + +事实上,`__new__()` 才是真正产生新对象的方法,`__init__()` 只是对对象进行了初始化,所以: + +```python +leaf = Leaf() +``` + +相当于 + +```python +my_new_leaf = Leaf.__new__(Leaf) +Leaf.__init__(my_new_leaf) +leaf = my_new_leaf +``` + +## 表示方法 `__repr__()` 和 `__str__()` + + +```python +class Leaf(object): + """ + A leaf falling in the woods. + """ + def __init__(self, color='green'): + self.color = color + def __str__(self): + "This is the string that is printed." + return "A {} leaf".format(self.color) + def __repr__(self): + "This string recreates the object." + return "{}(color='{}')".format(self.__class__.__name__, self.color) +``` + +`__str__()` 是使用 `print` 函数显示的结果: + + +```python +leaf = Leaf() + +print leaf +``` + + A green leaf + + +`__repr__()` 返回的是不使用 `print` 方法的结果: + + +```python +leaf +``` + + + + + Leaf(color='green') + + + +回到森林的例子: + + +```python +import numpy as np + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150)): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.fires = np.zeros((self.size), dtype=bool) + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ +``` + + +```python +forest = Forest() +``` + +`__str__()` 方法: + + +```python +print forest +``` + + Forest + + +`__repr__()` 方法: + + +```python +forest +``` + + + + + Forest(size=(150, 150)) + + + +`__name__` 和 `__class__` 为特殊的属性: + + +```python +forest.__class__ +``` + + + + + __main__.Forest + + + + +```python +forest.__class__.__name__ +``` + + + + + 'Forest' + + diff --git a/docs/08-object-oriented-programming/08.06-properties.md b/docs/08-object-oriented-programming/08.06-properties.md new file mode 100644 index 00000000..5a5ea086 --- /dev/null +++ b/docs/08-object-oriented-programming/08.06-properties.md @@ -0,0 +1,242 @@ + +# 属性 + +## 只读属性 + +只读属性,顾名思义,指的是只可读不可写的属性,之前我们定义的属性都是可读可写的,对于只读属性,我们需要使用 `@property` 修饰符来得到: + + +```python +class Leaf(object): + def __init__(self, mass_mg): + self.mass_mg = mass_mg + + # 这样 mass_oz 就变成属性了 + @property + def mass_oz(self): + return self.mass_mg * 3.53e-5 +``` + +这里 `mass_oz` 就是一个只读不写的属性(注意是属性不是方法),而 `mass_mg` 是可读写的属性: + + +```python +leaf = Leaf(200) + +print leaf.mass_oz +``` + + 0.00706 + + +可以修改 `mass_mg` 属性来改变 `mass_oz`: + + +```python +leaf.mass_mg = 150 + +print leaf.mass_oz +``` + + 0.005295 + + +是属性不是方法: + + +```python +leaf.mass_oz() +``` + + + --------------------------------------------------------------------------- + + TypeError Traceback (most recent call last) + + in () + ----> 1 leaf.mass_oz() + + + TypeError: 'float' object is not callable + + +而且是只读属性,不可写: + + +```python +leaf.mass_oz = 0.001 +``` + + + --------------------------------------------------------------------------- + + AttributeError Traceback (most recent call last) + + in () + ----> 1 leaf.mass_oz = 0.001 + + + AttributeError: can't set attribute + + +回到 `forest` 的例子,我们希望加入几个只读属性: + + +```python +import numpy as np + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150)): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.fires = np.zeros((self.size), dtype=bool) + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ + + @property + def num_cells(self): + """Number of cells available for growing trees""" + return np.prod(self.size) + + @property + def tree_fraction(self): + """ + Fraction of trees + """ + num_trees = self.trees.sum() + return float(num_trees) / self.num_cells + + @property + def fire_fraction(self): + """ + Fraction of fires + """ + num_fires = self.fires.sum() + return float(num_fires) / self.num_cells +``` + +查看属性: + + +```python +forest = Forest() + +forest.num_cells +``` + + + + + 22500 + + + +生成一个较小的森林: + + +```python +small_forest = Forest((10, 10)) +small_forest.num_cells +``` + + + + + 100 + + + +初始状态下,树和火灾的比例都是 0: + + +```python +small_forest.tree_fraction +``` + + + + + 0.0 + + + + +```python +small_forest.fire_fraction +``` + + + + + 0.0 + + + +## 可读写的属性 + +对于 `@property` 生成的只读属性,我们可以使用相应的 `@attr.setter` 修饰符来使得这个属性变成可写的: + + +```python +class Leaf(object): + def __init__(self, mass_mg): + self.mass_mg = mass_mg + + # 这样 mass_oz 就变成属性了 + @property + def mass_oz(self): + return self.mass_mg * 3.53e-5 + + # 使用 mass_oz.setter 修饰符 + @mass_oz.setter + def mass_oz(self, m_oz): + self.mass_mg = m_oz / 3.53e-5 +``` + +测试: + + +```python +leaf = Leaf(200) +print leaf.mass_oz + +leaf.mass_mg = 150 +print leaf.mass_oz +``` + + 0.00706 + 0.005295 + + +修改 `mass_oz` 属性: + + +```python +leaf.mass_oz = 0.01 +print leaf.mass_mg +``` + + 283.28611898 + + +一个等价的替代如下: + +```python +class Leaf(object): + def __init__(self, mass_mg): + self.mass_mg = mass_mg + + def get_mass_oz(self): + return self.mass_mg * 3.53e-5 + + def set_mass_oz(self, m_oz): + self.mass_mg = m_oz / 3.53e-5 + + mass_oz = property(get_mass_oz, set_mass_oz) +``` diff --git a/docs/08-object-oriented-programming/08.07-forest-fire-simulation.md b/docs/08-object-oriented-programming/08.07-forest-fire-simulation.md new file mode 100644 index 00000000..fc1b1e7a --- /dev/null +++ b/docs/08-object-oriented-programming/08.07-forest-fire-simulation.md @@ -0,0 +1,227 @@ + +# 森林火灾模拟 + +之前我们已经构建好了一些基础,但是还没有开始对火灾进行模拟。 + +## 随机生长 + +- 在原来的基础上,我们要先让树生长,即定义 `grow_trees()` 方法 +- 定义方法之前,我们要先指定两个属性: + - 每个位置随机生长出树木的概率 + - 每个位置随机被闪电击中的概率 +- 为了方便,我们定义一个辅助函数来生成随机 `bool` 矩阵,大小与森林大小一致 +- 按照给定的生长概率生成生长的位置,将 `trees` 中相应位置设为 `True` + + +```python +import numpy as np + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150), p_sapling=0.0025, p_lightning=5.0e-6): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.fires = np.zeros((self.size), dtype=bool) + self.p_sapling = p_sapling + self.p_lightning = p_lightning + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ + + @property + def num_cells(self): + """Number of cells available for growing trees""" + return np.prod(self.size) + + @property + def tree_fraction(self): + """ + Fraction of trees + """ + num_trees = self.trees.sum() + return float(num_trees) / self.num_cells + + @property + def fire_fraction(self): + """ + Fraction of fires + """ + num_fires = self.fires.sum() + return float(num_fires) / self.num_cells + + def _rand_bool(self, p): + """ + Random boolean distributed according to p, less than p will be True + """ + return np.random.uniform(size=self.trees.shape) < p + + def grow_trees(self): + """ + Growing trees. + """ + growth_sites = self._rand_bool(self.p_sapling) + self.trees[growth_sites] = True +``` + +测试: + + +```python +forest = Forest() +print forest.tree_fraction + +forest.grow_trees() +print forest.tree_fraction +``` + + 0.0 + 0.00293333333333 + + +## 火灾模拟 + +- 定义 `start_fires()`: + - 按照给定的概率生成被闪电击中的位置 + - 如果闪电击中的位置有树,那么将其设为着火点 +- 定义 `burn_trees()`: + - 如果一棵树的上下左右有火,那么这棵树也会着火 +- 定义 `advance_one_step()`: + - 进行一次生长,起火,燃烧 + + +```python +import numpy as np + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150), p_sapling=0.0025, p_lightning=5.0e-6): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.fires = np.zeros((self.size), dtype=bool) + self.p_sapling = p_sapling + self.p_lightning = p_lightning + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ + + @property + def num_cells(self): + """Number of cells available for growing trees""" + return np.prod(self.size) + + @property + def tree_fraction(self): + """ + Fraction of trees + """ + num_trees = self.trees.sum() + return float(num_trees) / self.num_cells + + @property + def fire_fraction(self): + """ + Fraction of fires + """ + num_fires = self.fires.sum() + return float(num_fires) / self.num_cells + + def _rand_bool(self, p): + """ + Random boolean distributed according to p, less than p will be True + """ + return np.random.uniform(size=self.trees.shape) < p + + def grow_trees(self): + """ + Growing trees. + """ + growth_sites = self._rand_bool(self.p_sapling) + self.trees[growth_sites] = True + + def start_fires(self): + """ + Start of fire. + """ + lightning_strikes = (self._rand_bool(self.p_lightning) & + self.trees) + self.fires[lightning_strikes] = True + + def burn_trees(self): + """ + Burn trees. + """ + fires = np.zeros((self.size[0] + 2, self.size[1] + 2), dtype=bool) + fires[1:-1, 1:-1] = self.fires + north = fires[:-2, 1:-1] + south = fires[2:, 1:-1] + east = fires[1:-1, :-2] + west = fires[1:-1, 2:] + new_fires = (north | south | east | west) & self.trees + self.trees[self.fires] = False + self.fires = new_fires + + def advance_one_step(self): + """ + Advance one step + """ + self.grow_trees() + self.start_fires() + self.burn_trees() +``` + + +```python +forest = Forest() + +for i in range(100): + forest.advance_one_step() +``` + +使用 `matshow()` 显示树木图像: + + +```python +import matplotlib.pyplot as plt +from matplotlib import cm + +%matplotlib inline + +plt.matshow(forest.trees, cmap=cm.Greens) + +plt.show() +``` + + +![png](output_12_0.png) + + +查看不同着火概率下的森林覆盖率趋势变化: + + +```python +forest = Forest() +forest2 = Forest(p_lightning=5e-4) + +tree_fractions = [] + +for i in range(2500): + forest.advance_one_step() + forest2.advance_one_step() + tree_fractions.append((forest.tree_fraction, forest2.tree_fraction)) + +plt.plot(tree_fractions) + +plt.show() +``` + + +![png](output_14_0.png) + diff --git a/docs/08-object-oriented-programming/08.08-inheritance.md b/docs/08-object-oriented-programming/08.08-inheritance.md new file mode 100644 index 00000000..2f94a132 --- /dev/null +++ b/docs/08-object-oriented-programming/08.08-inheritance.md @@ -0,0 +1,121 @@ + +# 继承 + +一个类定义的基本形式如下: +```python +class ClassName(ParentClass): + """class docstring""" + def method(self): + return +``` + +- `class` 关键词在最前面 +- `ClassName` 通常采用 `CamelCase` 记法 +- 括号中的 `ParentClass` 用来表示继承关系 +- 冒号不能缺少 +- `""""""` 中的内容表示 `docstring`,可以省略 +- 方法定义与函数定义十分类似,不过多了一个 `self` 参数表示这个对象本身 +- `class` 中的方法要进行缩进 + +在里面有一个 `ParentClass` 项,用来进行继承,被继承的类是父类,定义的这个类是子类。 +对于子类来说,继承意味着它可以使用所有父类的方法和属性,同时还可以定义自己特殊的方法和属性。 + +假设我们有这样一个父类: + + +```python +class Leaf(object): + def __init__(self, color="green"): + self.color = color + def fall(self): + print "Splat!" +``` + +测试: + + +```python +leaf = Leaf() + +print leaf.color +``` + + green + + + +```python +leaf.fall() +``` + + Splat! + + +现在定义一个子类,继承自 `Leaf`: + + +```python +class MapleLeaf(Leaf): + def change_color(self): + if self.color == "green": + self.color = "red" +``` + +继承父类的所有方法: + + +```python +mleaf = MapleLeaf() + +print mleaf.color +``` + + green + + + +```python +mleaf.fall() +``` + + Splat! + + +但是有自己独有的方法,父类中没有: + + +```python +mleaf.change_color() + +print mleaf.color +``` + + red + + +如果想对父类的方法进行修改,只需要在子类中重定义这个类即可: + + +```python +class MapleLeaf(Leaf): + def change_color(self): + if self.color == "green": + self.color = "red" + def fall(self): + self.change_color() + print "Plunk!" +``` + + +```python +mleaf = MapleLeaf() + +print mleaf.color +mleaf.fall() +print mleaf.color +``` + + green + Plunk! + red + diff --git a/docs/08-object-oriented-programming/08.09-super.md b/docs/08-object-oriented-programming/08.09-super.md new file mode 100644 index 00000000..eb1658c2 --- /dev/null +++ b/docs/08-object-oriented-programming/08.09-super.md @@ -0,0 +1,249 @@ + +# super() 函数 + + super(CurrentClassName, instance) + +返回该类实例对应的父类对象。 + + +```python +class Leaf(object): + def __init__(self, color="green"): + self.color = color + def fall(self): + print "Splat!" + +class MapleLeaf(Leaf): + def change_color(self): + if self.color == "green": + self.color = "red" + def fall(self): + self.change_color() + super(MapleLeaf, self).fall() +``` + +这里,我们先改变树叶的颜色,然后再找到这个实例对应的父类,并调用父类的 `fall()` 方法: + + +```python +mleaf = MapleLeaf() + +print mleaf.color +mleaf.fall() +print mleaf.color +``` + + green + Splat! + red + + +回到我们的森林例子,这里我们将森林 `Forest` 作为父类,并定义一个子类 `BurnableForest`: + + +```python +import numpy as np + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150), p_sapling=0.0025): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.p_sapling = p_sapling + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ + + @property + def num_cells(self): + """Number of cells available for growing trees""" + return np.prod(self.size) + + @property + def tree_fraction(self): + """ + Fraction of trees + """ + num_trees = self.trees.sum() + return float(num_trees) / self.num_cells + + def _rand_bool(self, p): + """ + Random boolean distributed according to p, less than p will be True + """ + return np.random.uniform(size=self.trees.shape) < p + + def grow_trees(self): + """ + Growing trees. + """ + growth_sites = self._rand_bool(self.p_sapling) + self.trees[growth_sites] = True + + def advance_one_step(self): + """ + Advance one step + """ + self.grow_trees() +``` + +- 将与燃烧相关的属性都被转移到了子类中去。 +- 修改两类的构造方法,将闪电概率放到子类的构造方法上,同时在子类的构造方法中,用 `super` 调用父类的构造方法。 +- 修改 `advance_one_step()`,父类中只进行生长,在子类中用 `super` 调用父类的 `advance_one_step()` 方法,并添加燃烧的部分。 + + +```python +class BurnableForest(Forest): + """ + Burnable forest support fires + """ + def __init__(self, p_lightning=5.0e-6, **kwargs): + super(BurnableForest, self).__init__(**kwargs) + self.p_lightning = p_lightning + self.fires = np.zeros((self.size), dtype=bool) + + def advance_one_step(self): + """ + Advance one step + """ + super(BurnableForest, self).advance_one_step() + self.start_fires() + self.burn_trees() + + @property + def fire_fraction(self): + """ + Fraction of fires + """ + num_fires = self.fires.sum() + return float(num_fires) / self.num_cells + + def start_fires(self): + """ + Start of fire. + """ + lightning_strikes = (self._rand_bool(self.p_lightning) & + self.trees) + self.fires[lightning_strikes] = True + + def burn_trees(self): + """ + Burn trees. + """ + fires = np.zeros((self.size[0] + 2, self.size[1] + 2), dtype=bool) + fires[1:-1, 1:-1] = self.fires + north = fires[:-2, 1:-1] + south = fires[2:, 1:-1] + east = fires[1:-1, :-2] + west = fires[1:-1, 2:] + new_fires = (north | south | east | west) & self.trees + self.trees[self.fires] = False + self.fires = new_fires +``` + +测试父类: + + +```python +forest = Forest() + +forest.grow_trees() + +print forest.tree_fraction +``` + + 0.00284444444444 + + +测试子类: + + +```python +burnable_forest = BurnableForest() +``` + +调用自己和父类的方法: + + +```python +burnable_forest.grow_trees() +burnable_forest.start_fires() +burnable_forest.burn_trees() +print burnable_forest.tree_fraction +``` + + 0.00235555555556 + + +查看变化: + + +```python +import matplotlib.pyplot as plt + +%matplotlib inline + +forest = Forest() +forest2 = BurnableForest() + +tree_fractions = [] + +for i in range(2500): + forest.advance_one_step() + forest2.advance_one_step() + tree_fractions.append((forest.tree_fraction, forest2.tree_fraction)) + +plt.plot(tree_fractions) + +plt.show() +``` + + +![png](output_16_0.png) + + +`__str__` 和 `__repr__` 中 `self.__class__` 会根据类型不同而不同: + + +```python +forest +``` + + + + + Forest(size=(150, 150)) + + + + +```python +forest2 +``` + + + + + BurnableForest(size=(150, 150)) + + + + +```python +print forest +``` + + Forest + + + +```python +print forest2 +``` + + BurnableForest + diff --git a/docs/08-object-oriented-programming/08.10-refactoring-the-forest-fire-simutation.md b/docs/08-object-oriented-programming/08.10-refactoring-the-forest-fire-simutation.md new file mode 100644 index 00000000..9ebd1597 --- /dev/null +++ b/docs/08-object-oriented-programming/08.10-refactoring-the-forest-fire-simutation.md @@ -0,0 +1,163 @@ + +# 重定义森林火灾模拟 + +在前面的例子中,我们定义了一个 `BurnableForest`,实现了一个循序渐进的生长和燃烧过程。 + +假设我们现在想要定义一个立即燃烧的过程(每次着火之后燃烧到不能燃烧为止,之后再生长,而不是每次只燃烧周围的一圈树木),由于燃烧过程不同,我们需要从 `BurnableForest` 中派生出两个新的子类 `SlowBurnForest`(原来的燃烧过程) 和 `InsantBurnForest`,为此 + +- 将 `BurnableForest` 中的 `burn_trees()` 方法改写,不做任何操作,直接 `pass`(因为在 `advance_one_step()` 中调用了它,所以不能直接去掉) +- 在两个子类中定义新的 `burn_trees()` 方法。 + + +```python +import numpy as np +from scipy.ndimage.measurements import label + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150), p_sapling=0.0025): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.p_sapling = p_sapling + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ + + @property + def num_cells(self): + """Number of cells available for growing trees""" + return np.prod(self.size) + + @property + def tree_fraction(self): + """ + Fraction of trees + """ + num_trees = self.trees.sum() + return float(num_trees) / self.num_cells + + def _rand_bool(self, p): + """ + Random boolean distributed according to p, less than p will be True + """ + return np.random.uniform(size=self.trees.shape) < p + + def grow_trees(self): + """ + Growing trees. + """ + growth_sites = self._rand_bool(self.p_sapling) + self.trees[growth_sites] = True + + def advance_one_step(self): + """ + Advance one step + """ + self.grow_trees() + +class BurnableForest(Forest): + """ + Burnable forest support fires + """ + def __init__(self, p_lightning=5.0e-6, **kwargs): + super(BurnableForest, self).__init__(**kwargs) + self.p_lightning = p_lightning + self.fires = np.zeros((self.size), dtype=bool) + + def advance_one_step(self): + """ + Advance one step + """ + super(BurnableForest, self).advance_one_step() + self.start_fires() + self.burn_trees() + + @property + def fire_fraction(self): + """ + Fraction of fires + """ + num_fires = self.fires.sum() + return float(num_fires) / self.num_cells + + def start_fires(self): + """ + Start of fire. + """ + lightning_strikes = (self._rand_bool(self.p_lightning) & + self.trees) + self.fires[lightning_strikes] = True + + def burn_trees(self): + pass + +class SlowBurnForest(BurnableForest): + def burn_trees(self): + """ + Burn trees. + """ + fires = np.zeros((self.size[0] + 2, self.size[1] + 2), dtype=bool) + fires[1:-1, 1:-1] = self.fires + north = fires[:-2, 1:-1] + south = fires[2:, 1:-1] + east = fires[1:-1, :-2] + west = fires[1:-1, 2:] + new_fires = (north | south | east | west) & self.trees + self.trees[self.fires] = False + self.fires = new_fires + +class InstantBurnForest(BurnableForest): + def burn_trees(self): + # 起火点 + strikes = self.fires + # 找到连通区域 + groves, num_groves = label(self.trees) + fires = set(groves[strikes]) + self.fires.fill(False) + # 将与着火点相连的区域都烧掉 + for fire in fires: + self.fires[groves == fire] = True + self.trees[self.fires] = False + self.fires.fill(False) +``` + +测试: + + +```python +forest = Forest() +sb_forest = SlowBurnForest() +ib_forest = InstantBurnForest() + +forests = [forest, sb_forest, ib_forest] + +tree_history = [] + +for i in xrange(1500): + for fst in forests: + fst.advance_one_step() + tree_history.append(tuple(fst.tree_fraction for fst in forests)) +``` + +显示结果: + + +```python +import matplotlib.pyplot as plt +%matplotlib inline + +plt.figure(figsize=(10,6)) + +plt.plot(tree_history) +plt.legend([f.__str__() for f in forests]) + +plt.show() +``` + + +![png](output_6_0.png) + diff --git a/docs/08-object-oriented-programming/08.11-interfaces.md b/docs/08-object-oriented-programming/08.11-interfaces.md new file mode 100644 index 00000000..9b8deabf --- /dev/null +++ b/docs/08-object-oriented-programming/08.11-interfaces.md @@ -0,0 +1,222 @@ + +# 接口 + +在 `Python` 中,鸭子类型(`duck typing`)是一种动态类型的风格。所谓鸭子类型,来自于 `James Whitcomb Riley` 的“鸭子测试”: + +> 当看到一只鸟走起来像鸭子、游泳起来像鸭子、叫起来也像鸭子,那么这只鸟就可以被称为鸭子。 + +假设我们需要定义一个函数,这个函数使用一个类型为鸭子的参数,并调用它的走和叫方法。 + +在鸭子类型的语言中,这样的函数可以接受任何类型的对象,只要这个对象实现了走和叫的方法,否则就引发一个运行时错误。换句话说,任何拥有走和叫方法的参数都是合法的。 + +先看一个例子,父类: + + +```python +class Leaf(object): + def __init__(self, color="green"): + self.color = color + def fall(self): + print "Splat!" +``` + +子类: + + +```python +class MapleLeaf(Leaf): + def fall(self): + self.color = 'brown' + super(MapleLeaf, self).fall() +``` + +新的类: + + +```python +class Acorn(object): + def fall(self): + print "Plunk!" +``` + +这三个类都实现了 `fall()` 方法,因此可以这样使用: + + +```python +objects = [Leaf(), MapleLeaf(), Acorn()] + +for obj in objects: + obj.fall() +``` + + Splat! + Splat! + Plunk! + + +这里 `fall()` 方法就一种鸭子类型的体现。 + +不仅方法可以用鸭子类型,属性也可以: + + +```python +import numpy as np +from scipy.ndimage.measurements import label + +class Forest(object): + """ Forest can grow trees which eventually die.""" + def __init__(self, size=(150,150), p_sapling=0.0025): + self.size = size + self.trees = np.zeros(self.size, dtype=bool) + self.p_sapling = p_sapling + + def __repr__(self): + my_repr = "{}(size={})".format(self.__class__.__name__, self.size) + return my_repr + + def __str__(self): + return self.__class__.__name__ + + @property + def num_cells(self): + """Number of cells available for growing trees""" + return np.prod(self.size) + + @property + def losses(self): + return np.zeros(self.size) + + @property + def tree_fraction(self): + """ + Fraction of trees + """ + num_trees = self.trees.sum() + return float(num_trees) / self.num_cells + + def _rand_bool(self, p): + """ + Random boolean distributed according to p, less than p will be True + """ + return np.random.uniform(size=self.trees.shape) < p + + def grow_trees(self): + """ + Growing trees. + """ + growth_sites = self._rand_bool(self.p_sapling) + self.trees[growth_sites] = True + + def advance_one_step(self): + """ + Advance one step + """ + self.grow_trees() + +class BurnableForest(Forest): + """ + Burnable forest support fires + """ + def __init__(self, p_lightning=5.0e-6, **kwargs): + super(BurnableForest, self).__init__(**kwargs) + self.p_lightning = p_lightning + self.fires = np.zeros((self.size), dtype=bool) + + def advance_one_step(self): + """ + Advance one step + """ + super(BurnableForest, self).advance_one_step() + self.start_fires() + self.burn_trees() + + @property + def losses(self): + return self.fires + + @property + def fire_fraction(self): + """ + Fraction of fires + """ + num_fires = self.fires.sum() + return float(num_fires) / self.num_cells + + def start_fires(self): + """ + Start of fire. + """ + lightning_strikes = (self._rand_bool(self.p_lightning) & + self.trees) + self.fires[lightning_strikes] = True + + def burn_trees(self): + pass + +class SlowBurnForest(BurnableForest): + def burn_trees(self): + """ + Burn trees. + """ + fires = np.zeros((self.size[0] + 2, self.size[1] + 2), dtype=bool) + fires[1:-1, 1:-1] = self.fires + north = fires[:-2, 1:-1] + south = fires[2:, 1:-1] + east = fires[1:-1, :-2] + west = fires[1:-1, 2:] + new_fires = (north | south | east | west) & self.trees + self.trees[self.fires] = False + self.fires = new_fires + +class InstantBurnForest(BurnableForest): + def burn_trees(self): + # 起火点 + strikes = self.fires + # 找到连通区域 + groves, num_groves = label(self.trees) + fires = set(groves[strikes]) + self.fires.fill(False) + # 将与着火点相连的区域都烧掉 + for fire in fires: + self.fires[groves == fire] = True + self.trees[self.fires] = False + self.fires.fill(False) +``` + +测试: + + +```python +forest = Forest() +b_forest = BurnableForest() +sb_forest = SlowBurnForest() +ib_forest = InstantBurnForest() + +forests = [forest, b_forest, sb_forest, ib_forest] + +losses_history = [] + +for i in xrange(1500): + for fst in forests: + fst.advance_one_step() + losses_history.append(tuple(fst.losses.sum() for fst in forests)) +``` + +显示结果: + + +```python +import matplotlib.pyplot as plt +%matplotlib inline + +plt.figure(figsize=(10,6)) + +plt.plot(losses_history) +plt.legend([f.__str__() for f in forests]) + +plt.show() +``` + + +![png](output_15_0.png) + diff --git a/docs/08-object-oriented-programming/08.12-public-private-special-in-python.md b/docs/08-object-oriented-programming/08.12-public-private-special-in-python.md new file mode 100644 index 00000000..f7d74ab3 --- /dev/null +++ b/docs/08-object-oriented-programming/08.12-public-private-special-in-python.md @@ -0,0 +1,54 @@ + +# 共有,私有和特殊方法和属性 + +- 我们之前已经见过 `special` 方法和属性,即以 `__` 开头和结尾的方法和属性 +- 私有方法和属性,以 `_` 开头,不过不是真正私有,而是可以调用的,但是不会被代码自动完成所记录(即 Tab 键之后不会显示) +- 其他都是共有的方法和属性 +- 以 `__` 开头不以 `__` 结尾的属性是更加特殊的方法,调用方式也不同: + + +```python +class MyClass(object): + def __init__(self): + print "I'm special!" + def _private(self): + print "I'm private!" + def public(self): + print "I'm public!" + def __really_special(self): + print "I'm really special!" +``` + + +```python +m = MyClass() +``` + + I'm special! + + + +```python +m.public() +``` + + I'm public! + + + +```python +m._private() +``` + + I'm private! + + +注意调用方式: + + +```python +m._MyClass__really_special() +``` + + I'm really special! + diff --git a/docs/08-object-oriented-programming/08.13-multiple-inheritance.md b/docs/08-object-oriented-programming/08.13-multiple-inheritance.md new file mode 100644 index 00000000..15fae3ac --- /dev/null +++ b/docs/08-object-oriented-programming/08.13-multiple-inheritance.md @@ -0,0 +1,155 @@ + +# 多重继承 + +多重继承,指的是一个类别可以同时从多于一个父类继承行为与特征的功能,`Python` 是支持多重继承的: + + +```python +class Leaf(object): + def __init__(self, color='green'): + self.color = color + +class ColorChangingLeaf(Leaf): + def change(self, new_color='brown'): + self.color = new_color + +class DeciduousLeaf(Leaf): + def fall(self): + print "Plunk!" + +class MapleLeaf(ColorChangingLeaf, DeciduousLeaf): + pass +``` + +在上面的例子中, `MapleLeaf` 就使用了多重继承,它可以使用两个父类的方法: + + +```python +leaf = MapleLeaf() + +leaf.change("yellow") +print leaf.color + +leaf.fall() +``` + + yellow + Plunk! + + +如果同时实现了不同的接口,那么,最后使用的方法以继承的顺序为准,放在前面的优先继承: + + +```python +class Leaf(object): + def __init__(self, color='green'): + self.color = color + +class ColorChangingLeaf(Leaf): + def change(self, new_color='brown'): + self.color = new_color + def fall(self): + print "Spalt!" + +class DeciduousLeaf(Leaf): + def fall(self): + print "Plunk!" + +class MapleLeaf(ColorChangingLeaf, DeciduousLeaf): + pass +``` + + +```python +leaf = MapleLeaf() +leaf.fall() +``` + + Spalt! + + + +```python +class MapleLeaf(DeciduousLeaf, ColorChangingLeaf): + pass +``` + + +```python +leaf = MapleLeaf() +leaf.fall() +``` + + Plunk! + + +事实上,这个顺序可以通过该类的 `__mro__` 属性或者 `mro()` 方法来查看: + + +```python +MapleLeaf.__mro__ +``` + + + + + (__main__.MapleLeaf, + __main__.DeciduousLeaf, + __main__.ColorChangingLeaf, + __main__.Leaf, + object) + + + + +```python +MapleLeaf.mro() +``` + + + + + [__main__.MapleLeaf, + __main__.DeciduousLeaf, + __main__.ColorChangingLeaf, + __main__.Leaf, + object] + + + +考虑更复杂的例子: + + +```python +class A(object): + pass + +class B(A): + pass + +class C(A): + pass + +class C1(C): + pass + +class B1(B): + pass + +class D(B1, C): + pass +``` + +调用顺序: + + +```python +D.mro() +``` + + + + + [__main__.D, __main__.B1, __main__.B, __main__.C, __main__.A, object] + + diff --git a/docs/09-theano/09.01-introduction-and-installation.md b/docs/09-theano/09.01-introduction-and-installation.md new file mode 100644 index 00000000..de4ec745 --- /dev/null +++ b/docs/09-theano/09.01-introduction-and-installation.md @@ -0,0 +1,97 @@ + +# Theano 简介及其安装 + +# 简介 + +`Theano` 是一个 `Python` 科学计算库,允许我们进行符号运算,并在 `CPU` 和 `GPU` 上执行。 + +它最初由 `Montreal` 大学的机器学习研究者们所开发,用来进行机器学习的计算。 + +按照[官网](http://deeplearning.net/software/theano/)上的说明,它拥有以下几个方面的特点: + +- 与 **Numpy, Scipy** 的紧密结合 +- **GPU** 加速 +- 高效的符号计算 +- 速度和稳定性 +- 动态生成 **C** 代码 + +## 使用 anaconda 安装 theano + +`windows` 下,使用 `anaconda` 安装 `theano` 的命令为: + + conda install mingw libpython + pip install theano + +`linux` 下,使用 `anaconda` 安装的命令为 + + conda install theano + +安装好之后,还需要安装 `Cuda` 并进行 `GPU` 环境的配置,否则是不能利用 `GPU` 进行计算的,推荐使用 `linux/mac` 进行配置,具体方法可以参考[官网](http://deeplearning.net/software/theano/)上的配置说明。 + +查看安装的版本: + + +```python +import theano + +theano.__version__ +``` + + + + + '0.7.0.dev-54186290a97186b9c6b76317e007844529a352f4' + + + +查看当前使用的 device: + + +```python +theano.config.device +``` + + + + + 'cpu' + + + +运行测试: + + +```python +theano.test() +``` + + /usr/local/lib/python2.7/dist-packages/theano/misc/pycuda_init.py:34: UserWarning: PyCUDA import failed in theano.misc.pycuda_init + warnings.warn("PyCUDA import failed in theano.misc.pycuda_init") + ....................S............... + + Theano version 0.7.0.dev-54186290a97186b9c6b76317e007844529a352f4 + theano is installed in /usr/local/lib/python2.7/dist-packages/theano + NumPy version 1.10.1 + NumPy relaxed strides checking option: True + NumPy is installed in /usr/lib/python2.7/dist-packages/numpy + Python version 2.7.6 (default, Jun 22 2015, 17:58:13) [GCC 4.8.2] + nose version 1.3.7 + + + + ---------------------------------------------------------------------- + Ran 37 tests in 37.919s + + OK (SKIP=1) + + + + + + + + + +这里我已经在本地 `Windows` 配好了 `GPU` 的设置,如果没有配好,显示的结果可能不一样。 + +`Windows` 下第一次运行可能会显示 `DEBUG: nvcc STDOUT` 等内容,**`Just ignore it!`** diff --git a/docs/09-theano/09.02-theano-basics.md b/docs/09-theano/09.02-theano-basics.md new file mode 100644 index 00000000..deea9cf8 --- /dev/null +++ b/docs/09-theano/09.02-theano-basics.md @@ -0,0 +1,394 @@ + +# Theano 基础 + + +```python +%matplotlib inline +import numpy as np +import matplotlib.pyplot as plt +``` + +首先导入 `theano` 及其 `tensor` 子模块(`tensor`,张量): + + +```python +import theano + +# 一般都把 `tensor` 子模块导入并命名为 T +import theano.tensor as T +``` + + Using gpu device 1: Tesla K10.G2.8GB (CNMeM is disabled) + + +`tensor` 模块包含很多我们常用的数学操作,所以为了方便,将其命名为 T。 + +## 符号计算 + +`theano` 中,所有的算法都是用符号计算的,所以某种程度上,用 `theano` 写算法更像是写数学(之前在[04.06 积分](../04. scipy/04.06 integration in python.ipynb)一节中接触过用 `sympy` 定义的符号变量)。 + +用 `T.scalar` 来定义一个符号标量: + + +```python +foo = T.scalar('x') +``` + + +```python +print foo +``` + + x + + +支持符号计算: + + +```python +bar = foo ** 2 + +print bar +``` + + Elemwise{pow,no_inplace}.0 + + +这里定义 `foo` 是 $x$,`bar` 就是变量 $x^2$,但显示出来的却是看不懂的东西。 + +为了更好的显示 `bar`,我们使用 `theano.pp()` 函数(`pretty print`)来显示: + + +```python +print theano.pp(bar) +``` + + (x ** TensorConstant{2}) + + +查看类型: + + +```python +print type(foo) +print foo.type +``` + + + TensorType(float32, scalar) + + +## theano 函数 + +有了符号变量,自然可以用符号变量来定义函数,`theano.function()` 函数用来生成符号函数: + + theano.function(input, output) + +其中 `input` 对应的是作为参数的符号变量组成的列表,`output` 对应的是输出,输出可以是一个,也可以是多个符号变量组成的列表。 + +例如,我们用刚才生成的 `foo` 和 `bar` 来定义函数: + + +```python +square = theano.function([foo], bar) +``` + +使用 `square` 函数: + + +```python +print square(3) +``` + + 9.0 + + +也可以使用 `bar` 的 `eval` 方法,将 `x` 替换为想要的值,`eval` 接受一个字典作为参数,键值对表示符号变量及其对应的值: + + +```python +print bar.eval({foo: 3}) +``` + + 9.0 + + +## theano.tensor + +除了 `T.scalar()` 标量之外,`Theano` 中还有很多符号变量类型,这些都包含在 `tensor`(张量)子模块中,而且 `tensor` 中也有很多函数对它们进行操作。 + +- `T.scalar(name=None, dtype=config.floatX)` + - 标量,shape - () +- `T.vector(name=None, dtype=config.floatX)` + - 向量,shape - (?,) +- `T.matrix(name=None, dtype=config.floatX)` + - 矩阵,shape - (?,?) +- `T.row(name=None, dtype=config.floatX)` + - 行向量,shape - (1,?) +- `T.col(name=None, dtype=config.floatX)` + - 列向量,shape - (?,1) +- `T.tensor3(name=None, dtype=config.floatX)` + - 3 维张量,shape - (?,?,?) +- `T.tensor4(name=None, dtype=config.floatX)` + - 4 维张量,shape - (?,?,?,?) + +`shape` 中为 1 的维度支持 `broadcast` 机制。 + +除了直接指定符号变量的类型(默认 `floatX`),还可以直接在每类前面加上一个字母来定义不同的类型: + +- `b` int8 +- `w` int16 +- `i` int32 +- `l` int64 +- `d` float64 +- `f` float32 +- `c` complex64 +- `z` complex128 + +例如 `T.dvector()` 表示的就是一个 `float64` 型的向量。 + +除此之外,还可以用它们的复数形式一次定义多个符号变量: + + x,y,z = T.vectors('x','y','z') + x,y,z = T.vectors(3) + + +```python +A = T.matrix('A') +x = T.vector('x') +b = T.vector('b') +``` + +`T.dot()` 表示矩阵乘法: +$$y = Ax+b$$ + + +```python +y = T.dot(A, x) + b +``` + +`T.sum()` 表示进行求和: +$$z = \sum_{i,j} A_{ij}^2$$ + + +```python +z = T.sum(A**2) +``` + +来定义一个线性函数,以 $A,x,b$ 为参数,以 $y,z$ 为输出: + + +```python +linear_mix = theano.function([A, x, b], + [y, z]) +``` + +使用这个函数: + +$$ +A = \begin{bmatrix} +1 & 2 & 3 \\ +4 & 5 & 6 +\end{bmatrix}, +x = \begin{bmatrix} +1 \\ 2 \\ 3 +\end{bmatrix}, +b = \begin{bmatrix} +4 \\ 5 +\end{bmatrix} +$$ + + +```python +print linear_mix(np.array([[1, 2, 3], + [4, 5, 6]], dtype=theano.config.floatX), #A + np.array([1, 2, 3], dtype=theano.config.floatX), #x + np.array([4, 5], dtype=theano.config.floatX)) #b +``` + + [array([ 18., 37.], dtype=float32), array(91.0, dtype=float32)] + + +这里 `dtype=theano.config.floatX` 是为了与 `theano` 设置的浮点数精度保持一致,默认是 `float64`,但是在 `GPU` 上一般使用 `float32` 会更高效一些。 + +我们还可以像定义普通函数一样,给 `theano` 函数提供默认值,需要使用 `theano.Param` 类: + + +```python +linear_mix_default = theano.function([A, x, theano.Param(b, default=np.zeros(2, dtype=theano.config.floatX))], + [y, z]) +``` + +计算默认参数下的结果: + + +```python +print linear_mix_default(np.array([[1, 2, 3], + [4, 5, 6]], dtype=theano.config.floatX), #A + np.array([1, 2, 3], dtype=theano.config.floatX)) #x +``` + + [array([ 14., 32.], dtype=float32), array(91.0, dtype=float32)] + + +计算刚才的结果: + + +```python +print linear_mix_default(np.array([[1, 2, 3], + [4, 5, 6]], dtype=theano.config.floatX), #A + np.array([1, 2, 3], dtype=theano.config.floatX), #x + np.array([4, 5], dtype=theano.config.floatX)) #b +``` + + [array([ 18., 37.], dtype=float32), array(91.0, dtype=float32)] + + +## 共享的变量 + +`Theano` 中可以定义共享的变量,它们可以在多个函数中被共享,共享变量类似于普通函数定义时候使用的全局变量,同时加上了 `global` 的属性以便在函数中修改这个全局变量的值。 + + +```python +shared_var = theano.shared(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=theano.config.floatX)) + +print shared_var.type +``` + + CudaNdarrayType(float32, matrix) + + +可以通过 `set_value` 方法改变它的值: + + +```python +shared_var.set_value(np.array([[3.0, 4], [2, 1]], dtype=theano.config.floatX)) +``` + +通过 `get_value()` 方法返回它的值: + + +```python +print shared_var.get_value() +``` + + [[ 3. 4.] + [ 2. 1.]] + + +共享变量进行运算: + + +```python +shared_square = shared_var ** 2 + +f = theano.function([], shared_square) + +print f() +``` + + [[ 9. 16.] + [ 4. 1.]] + + +这里函数不需要参数,因为共享变量隐式地被认为是一个参数。 + +得到的结果会随这个共享变量的变化而变化: + + +```python +shared_var.set_value(np.array([[1.0, 2], [3, 4]], dtype=theano.config.floatX)) + +print f() +``` + + [[ 1. 4.] + [ 9. 16.]] + + +一个共享变量的值可以用 `updates` 关键词在 `theano` 函数中被更新: + + +```python +subtract = T.matrix('subtract') + +f_update = theano.function([subtract], shared_var, updates={shared_var: shared_var - subtract}) +``` + +这个函数先返回当前的值,然后将当前值更新为原来的值减去参数: + + +```python +print 'before update:' +print shared_var.get_value() + +print 'the return value:' +print f_update(np.array([[1.0, 1], [1, 1]], dtype=theano.config.floatX)) + +print 'after update:' +print shared_var.get_value() +``` + + before update: + [[ 1. 2.] + [ 3. 4.]] + the return value: + + after update: + [[ 0. 1.] + [ 2. 3.]] + + +## 导数 + +`Theano` 的一大好处在于它对符号变量计算导数的能力。 + +我们用 `T.grad()` 来计算导数,之前我们定义了 `foo` 和 `bar` (分别是 $x$ 和 $x^2$),我们来计算 `bar` 关于 `foo` 的导数(应该是 $2x$): + + +```python +bar_grad = T.grad(bar, foo) # 表示 bar (x^2) 关于 foo (x) 的导数 + +print bar_grad.eval({foo: 10}) +``` + + 20.0 + + +再如,对之前的 $y = Ax + b$ 求 $y$ 关于 $x$ 的雅可比矩阵(应当是 $A$): + + +```python +y_J = theano.gradient.jacobian(y, x) + +print y_J.eval({A: np.array([[9.0, 8, 7], [4, 5, 6]], dtype=theano.config.floatX), #A + x: np.array([1.0, 2, 3], dtype=theano.config.floatX), #x + b: np.array([4.0, 5], dtype=theano.config.floatX)}) #b +``` + + [[ 9. 8. 7.] + [ 4. 5. 6.]] + + +`theano.gradient.jacobian` 用来计算雅可比矩阵,而 `theano.gradient.hessian` 可以用来计算 `Hessian` 矩阵。 + +## `R-op` 和 `L-op` + +Rop 用来计算 $\frac{\partial f}{\partial x}v$,Lop 用来计算 $v\frac{\partial f}{\partial x}$: + +一个是雅可比矩阵与列向量的乘积,另一个是行向量与雅可比矩阵的乘积。 + + +```python +W = T.dmatrix('W') +V = T.dmatrix('V') +x = T.dvector('x') +y = T.dot(x, W) +JV = T.Rop(y, W, V) +f = theano.function([W, V, x], JV) + +print f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1]) +``` + + [ 2. 2.] + diff --git a/docs/09-theano/09.03-gpu-on-windows.md b/docs/09-theano/09.03-gpu-on-windows.md new file mode 100644 index 00000000..a5dbd793 --- /dev/null +++ b/docs/09-theano/09.03-gpu-on-windows.md @@ -0,0 +1,239 @@ + +# Theano 在 Windows 上的配置 + +注意:不建议在 `windows` 进行 `theano` 的配置。 + +务必确认你的显卡支持 `CUDA`。 + +我个人的电脑搭载的是 `Windows 10 x64` 系统,显卡是 `Nvidia GeForce GTX 850M`。 + +## 安装 theano + +首先是用 `anaconda` 安装 `theano`: + + conda install mingw libpython + pip install theano + +## 安装 VS 和 CUDA + +按顺序安装这两个软件: +- 安装 Visual Studio 2010/2012/2013 +- 安装 对应的 x64 或 x86 CUDA + +Cuda 的版本与电脑的显卡兼容。 + +我安装的是 Visual Studio 2012 和 CUDA v7.0v。 + +## 配置环境变量 + +`CUDA` 会自动帮你添加一个 `CUDA_PATH` 环境变量(环境变量在 控制面板->系统与安全->系统->高级系统设置 中),表示你的 `CUDA` 安装位置,我的电脑上为: + +- `CUDA_PATH` + - `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0` + +我们配置两个相关变量: + +- `CUDA_BIN_PATH` + - `%CUDA_PATH%\bin` +- `CUDA_LIB_PATH` + - `%CUDA_PATH%\lib\Win32` + +接下来在 `Path` 环境变量的后面加上: + +- `Minicoda` 中关于 `mingw` 的项: + - `C:\Miniconda\MinGW\bin;` + - `C:\Miniconda\MinGW\x86_64-w64-mingw32\lib;` + +- `VS` 中的 `cl` 编译命令: + - `C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin;` + - `C:\Program Files (x86)\Microsoft Visual Studio 11.0\Common7\IDE;` + +生成测试文件: + + +```python +%%file test_theano.py +from theano import config +print 'using device:', config.device +``` + + Writing test_theano.py + + +我们可以通过临时设置环境变量 `THEANO_FLAGS` 来改变 `theano` 的运行模式,在 linux 下,临时环境变量直接用: + + THEANO_FLAGS=xxx + +就可以完成,设置完成之后,该环境变量只在当前的命令窗口有效,你可以这样运行你的代码: + + THEANO_FLAGS=xxx python .py + +在 `Windows` 下,需要使用 `set` 命令来临时设置环境变量,所以运行方式为: + + set THEANO_FLAGS=xxx && python .py + + +```python +import sys + +if sys.platform == 'win32': + !set THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 && python test_theano.py +else: + !THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python test_theano.py +``` + + using device: cpu + + + +```python +if sys.platform == 'win32': + !set THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 && python test_theano.py +else: + !THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python test_theano.py +``` + + Using gpu device 0: Tesla C2075 (CNMeM is disabled) + using device: gpu + + +测试 `CPU` 和 `GPU` 的差异: + + +```python +%%file test_theano.py + +from theano import function, config, shared, sandbox +import theano.tensor as T +import numpy +import time + +vlen = 10 * 30 * 768 # 10 x #cores x # threads per core +iters = 1000 + +rng = numpy.random.RandomState(22) +x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) +f = function([], T.exp(x)) + +t0 = time.time() +for i in xrange(iters): + r = f() +t1 = time.time() +print("Looping %d times took %f seconds" % (iters, t1 - t0)) +print("Result is %s" % (r,)) +if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]): + print('Used the cpu') +else: + print('Used the gpu') +``` + + Overwriting test_theano.py + + + +```python +if sys.platform == 'win32': + !set THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 && python test_theano.py +else: + !THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 python test_theano.py +``` + + Looping 1000 times took 3.498123 seconds + Result is [ 1.23178029 1.61879337 1.52278066 ..., 2.20771813 2.29967761 + 1.62323284] + Used the cpu + + + +```python +if sys.platform == 'win32': + !set THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 && python test_theano.py +else: + !THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python test_theano.py +``` + + Using gpu device 0: Tesla C2075 (CNMeM is disabled) + Looping 1000 times took 0.847006 seconds + Result is [ 1.23178029 1.61879349 1.52278066 ..., 2.20771813 2.29967761 + 1.62323296] + Used the gpu + + +可以看到 `GPU` 明显要比 `CPU` 快。 + +使用 `GPU` 模式的 `T.exp(x)` 可以获得更快的加速效果: + + +```python +%%file test_theano.py + +from theano import function, config, shared, sandbox +import theano.sandbox.cuda.basic_ops +import theano.tensor as T +import numpy +import time + +vlen = 10 * 30 * 768 # 10 x #cores x # threads per core +iters = 1000 + +rng = numpy.random.RandomState(22) +x = shared(numpy.asarray(rng.rand(vlen), 'float32')) +f = function([], sandbox.cuda.basic_ops.gpu_from_host(T.exp(x))) + +t0 = time.time() +for i in xrange(iters): + r = f() +t1 = time.time() +print("Looping %d times took %f seconds" % (iters, t1 - t0)) +print("Result is %s" % (r,)) +print("Numpy result is %s" % (numpy.asarray(r),)) +if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]): + print('Used the cpu') +else: + print('Used the gpu') +``` + + Overwriting test_theano.py + + + +```python +if sys.platform == 'win32': + !set THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 && python test_theano.py +else: + !THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python test_theano.py +``` + + Using gpu device 0: Tesla C2075 (CNMeM is disabled) + Looping 1000 times took 0.318359 seconds + Result is + Numpy result is [ 1.23178029 1.61879349 1.52278066 ..., 2.20771813 2.29967761 + 1.62323296] + Used the gpu + + + +```python +!rm test_theano.py +``` + +## 配置 .theanorc.txt + +我们可以在个人文件夹下配置 .theanorc.txt 文件来省去每次都使用环境变量设置的麻烦: + +例如我现在的 .theanorc.txt 配置为: +``` +[global] +device = gpu +floatX = float32 + +[nvcc] +fastmath = True +flags = -LC:\Miniconda\libs +compiler_bindir=C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin + +[gcc] +cxxflags = -LC:\Miniconda\MinGW +``` + +具体这些配置有什么作用之后可以查看官网上的教程。 diff --git a/docs/09-theano/09.04-graph-structures.md b/docs/09-theano/09.04-graph-structures.md new file mode 100644 index 00000000..18f1bedf --- /dev/null +++ b/docs/09-theano/09.04-graph-structures.md @@ -0,0 +1,213 @@ + +# Theano 符号图结构 + +使用 `Theano`,首先要定义符号变量,然后是利用这写符号变量进行计算,这些符号被称为 **`variables`**,而操作 `+, -, **, sum(), tanh()` 被称为 **`ops`**,一个 `op` 操作接受某些类型的输入,并返回某些类型的输出。 + +`Theano` 利用这些来构建一个图结构,一个图结构包括: +- **`variable`** 节点 +- **`op`** 节点 +- **`apply`** 节点 + +其中,`apply` 节点用来表示一个特定的 `op` 作用在一些特定的 `variables` 上,例如: + + +```python +import theano +import theano.tensor as T + +x = T.dmatrix('x') +y = T.dmatrix('y') +z = x + y +``` + + Using gpu device 0: GeForce GTX 850M + + +要显示这个图结构可以用 `pydotprint`,先安装 [graphviz](http://www.graphviz.org)。 + +`Windows` 下: + +在环境变量 path 后加上: + +- path + - C:\Program Files (x86)\Graphviz2.38\bin + +然后要先安装 `pydot` 包: + +如果你的 `pyparsing >= 2.0` ,则将其降为 `1.5.7`,下载并安装 `pydot-1.0.28`。 + +安装完之后,找到 `pydot.py` 将其中: + + graph.append( '%s %s {\n' % (self.obj_dict['type'], self.obj_dict['name']) ) + +修改为: + + graph.append( '%s %s {\n' % (self.obj_dict['type'], quote_if_necessary(self.obj_dict['name'])) ) + + +```python +theano.printing.pydotprint(z, outfile='apply1.png', var_with_name_simple=True) +``` + + The output file is available at apply1.png + + +它的图结构如下: + +![图结构1](apply1.png) + +`z` 的 `owner` 是一个 `apply` 结构,其 `op` 为: + + +```python +z.owner.op.name +``` + + + + + 'Elemwise{add,no_inplace}' + + + +这个 `apply` 结构的输入值有两个,输出值有一个: + + +```python +print z.owner.nin +print z.owner.nout +``` + + 2 + 1 + + +查看它的输入: + + +```python +z.owner.inputs +``` + + + + + [x, y] + + + +我们可以用 pprint 来显示它: + + +```python +print theano.printing.pprint(z) +``` + + (x + y) + + +用 `debugprint` 显示图结构: + + +```python +theano.printing.debugprint(z) +``` + + Elemwise{add,no_inplace} [@A] '' + |x [@B] + |y [@C] + + +再看另一个稍微复杂的例子: + + +```python +y = x * 2 +``` + +查看 `y` 的图谱: + + +```python +theano.printing.debugprint(y) +``` + + Elemwise{mul,no_inplace} [@A] '' + |x [@B] + |DimShuffle{x,x} [@C] '' + |TensorConstant{2} [@D] + + +这里我们看到,`y` 对应的第二个 `input` 并不是 `2`,而是一个 `DimShuffle` 的操作: + + +```python +y.owner.inputs[1].owner.op +``` + + + + + + + + +它的输入才是常数 2: + + +```python +y.owner.inputs[1].owner.inputs +``` + + + + + [TensorConstant{2}] + + + + +```python +theano.printing.pydotprint(y, outfile='apply2.png', var_with_name_simple=True) +``` + + The output file is available at apply2.png + + +其图结构为 +![结构2](apply2.png) + +## function 对图的优化 + + +```python +a = T.dscalar('a') +b = a + a ** 10 + +f = theano.function([a], b) +``` + + +```python +theano.printing.pydotprint(b, outfile='apply_no_opti.png', var_with_name_simple=True) +theano.printing.pydotprint(f, outfile='apply_opti.png', var_with_name_simple=True) +``` + + The output file is available at apply_no_opti.png + The output file is available at apply_opti.png + + +比较一下 `function` 函数对图结构进行的优化: + +未优化前: + +![没有优化](apply_no_opti.png) + +优化后: + +![优化](apply_opti.png) + +## 图结构的作用 + +- 计算按照图结构来计算 +- 优化,求导 diff --git a/docs/09-theano/09.05-configuration-settings-and-compiling-modes.md b/docs/09-theano/09.05-configuration-settings-and-compiling-modes.md new file mode 100644 index 00000000..5dd01a95 --- /dev/null +++ b/docs/09-theano/09.05-configuration-settings-and-compiling-modes.md @@ -0,0 +1,735 @@ + +# Theano 配置和编译模式 + +## 配置 + +之前我们已经知道, `theano` 的配置可以用 `config` 模块来查看: + + +```python +import theano +import theano.tensor as T + +print theano.config +``` + + floatX (('float64', 'float32', 'float16')) + Doc: Default floating-point precision for python casts. + + Note: float16 support is experimental, use at your own risk. + Value: float32 + + warn_float64 (('ignore', 'warn', 'raise', 'pdb')) + Doc: Do an action when a tensor variable with float64 dtype is created. They can't be run on the GPU with the current(old) gpu back-end and are slow with gamer GPUs. + Value: ignore + + cast_policy (('custom', 'numpy+floatX')) + Doc: Rules for implicit type casting + Value: custom + + int_division (('int', 'raise', 'floatX')) + Doc: What to do when one computes x / y, where both x and y are of integer types + Value: int + + device (cpu, gpu*, opencl*, cuda*) + Doc: Default device for computations. If gpu*, change the default to try to move computation to it and to put shared variable of float32 on it. Do not use upper case letters, only lower case even if NVIDIA use capital letters. + Value: gpu1 + + init_gpu_device (, gpu*, opencl*, cuda*) + Doc: Initialize the gpu device to use, works only if device=cpu. Unlike 'device', setting this option will NOT move computations, nor shared variables, to the specified GPU. It can be used to run GPU-specific tests on a particular GPU. + Value: + + force_device () + Doc: Raise an error if we can't use the specified device + Value: False + + + Doc: + Context map for multi-gpu operation. Format is a + semicolon-separated list of names and device names in the + 'name->dev_name' format. An example that would map name 'test' to + device 'cuda0' and name 'test2' to device 'opencl0:0' follows: + "test->cuda0;test2->opencl0:0". + + Invalid context names are 'cpu', 'cuda*' and 'opencl*' + + Value: + + print_active_device () + Doc: Print active device at when the GPU device is initialized. + Value: True + + enable_initial_driver_test () + Doc: Tests the nvidia driver when a GPU device is initialized. + Value: True + + cuda.root () + Doc: directory with bin/, lib/, include/ for cuda utilities. + This directory is included via -L and -rpath when linking + dynamically compiled modules. If AUTO and nvcc is in the + path, it will use one of nvcc parent directory. Otherwise + /usr/local/cuda will be used. Leave empty to prevent extra + linker directives. Default: environment variable "CUDA_ROOT" + or else "AUTO". + + Value: /usr/local/cuda-7.0 + + + Doc: Extra compiler flags for nvcc + Value: + + nvcc.compiler_bindir () + Doc: If defined, nvcc compiler driver will seek g++ and gcc in this directory + Value: + + nvcc.fastmath () + Doc: + Value: False + + gpuarray.sync () + Doc: If True, every op will make sure its work is done before + returning. Setting this to True will slow down execution, + but give much more accurate results in profiling. + Value: False + + gpuarray.preallocate () + Doc: If 0 it doesn't do anything. If between 0 and 1 it + will preallocate that fraction of the total GPU memory. + If 1 or greater it will preallocate that amount of memory + (in megabytes). + Value: 0.0 + + + Doc: This flag is deprecated; use dnn.conv.algo_fwd. + Value: True + + + Doc: This flag is deprecated; use dnn.conv.algo_bwd. + Value: True + + + Doc: This flag is deprecated; use dnn.conv.algo_bwd_data and dnn.conv.algo_bwd_filter. + Value: True + + dnn.conv.algo_fwd (('small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change')) + Doc: Default implementation to use for CuDNN forward convolution. + Value: small + + dnn.conv.algo_bwd_data (('none', 'deterministic', 'fft', 'fft_tiling', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change')) + Doc: Default implementation to use for CuDNN backward convolution to get the gradients of the convolution with regard to the inputs. + Value: none + + dnn.conv.algo_bwd_filter (('none', 'deterministic', 'fft', 'small', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change')) + Doc: Default implementation to use for CuDNN backward convolution to get the gradients of the convolution with regard to the filters. + Value: none + + dnn.conv.precision (('as_input', 'float16', 'float32', 'float64')) + Doc: Default data precision to use for the computation in CuDNN convolutions (defaults to the same dtype as the inputs of the convolutions). + Value: as_input + + dnn.include_path () + Doc: Location of the cudnn header (defaults to the cuda root) + Value: /usr/local/cuda-7.0/include + + dnn.library_path () + Doc: Location of the cudnn header (defaults to the cuda root) + Value: /usr/local/cuda-7.0/lib64 + + assert_no_cpu_op (('ignore', 'warn', 'raise', 'pdb')) + Doc: Raise an error/warning if there is a CPU op in the computational graph. + Value: ignore + + mode (('Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN', 'NanGuardMode', 'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE')) + Doc: Default compilation mode + Value: Mode + + cxx () + Doc: The C++ compiler to use. Currently only g++ is supported, but supporting additional compilers should not be too difficult. If it is empty, no C++ code is compiled. + Value: /usr/bin/g++ + + linker (('cvm', 'c|py', 'py', 'c', 'c|py_nogc', 'vm', 'vm_nogc', 'cvm_nogc')) + Doc: Default linker used if the theano flags mode is Mode or ProfileMode(deprecated) + Value: cvm + + allow_gc () + Doc: Do we default to delete intermediate results during Theano function calls? Doing so lowers the memory requirement, but asks that we reallocate memory at the next function call. This is implemented for the default linker, but may not work for all linkers. + Value: True + + optimizer (('fast_run', 'merge', 'fast_compile', 'None')) + Doc: Default optimizer. If not None, will use this linker with the Mode object (not ProfileMode(deprecated) or DebugMode) + Value: fast_run + + optimizer_verbose () + Doc: If True, we print all optimization being applied + Value: False + + on_opt_error (('warn', 'raise', 'pdb', 'ignore')) + Doc: What to do when an optimization crashes: warn and skip it, raise the exception, or fall into the pdb debugger. + Value: warn + + + Doc: This config option was removed in 0.5: do not use it! + Value: True + + nocleanup () + Doc: Suppress the deletion of code files that did not compile cleanly + Value: False + + on_unused_input (('raise', 'warn', 'ignore')) + Doc: What to do if a variable in the 'inputs' list of theano.function() is not used in the graph. + Value: raise + + tensor.cmp_sloppy () + Doc: Relax tensor._allclose (0) not at all, (1) a bit, (2) more + Value: 0 + + tensor.local_elemwise_fusion () + Doc: Enable or not in fast_run mode(fast_run optimization) the elemwise fusion optimization + Value: True + + gpu.local_elemwise_fusion () + Doc: Enable or not in fast_run mode(fast_run optimization) the gpu elemwise fusion optimization + Value: True + + lib.amdlibm () + Doc: Use amd's amdlibm numerical library + Value: False + + gpuelemwise.sync () + Doc: when true, wait that the gpu fct finished and check it error code. + Value: True + + traceback.limit () + Doc: The number of stack to trace. -1 mean all. + Value: 8 + + experimental.mrg () + Doc: Another random number generator that work on the gpu + Value: False + + experimental.unpickle_gpu_on_cpu () + Doc: Allow unpickling of pickled CudaNdarrays as numpy.ndarrays.This is useful, if you want to open a CudaNdarray without having cuda installed.If you have cuda installed, this will force unpickling tobe done on the cpu to numpy.ndarray.Please be aware that this may get you access to the data,however, trying to unpicke gpu functions will not succeed.This flag is experimental and may be removed any time, whengpu<>cpu transparency is solved. + Value: False + + numpy.seterr_all (('ignore', 'warn', 'raise', 'call', 'print', 'log', 'None')) + Doc: ("Sets numpy's behaviour for floating-point errors, ", "see numpy.seterr. 'None' means not to change numpy's default, which can be different for different numpy releases. This flag sets the default behaviour for all kinds of floating-point errors, its effect can be overriden for specific errors by the following flags: seterr_divide, seterr_over, seterr_under and seterr_invalid.") + Value: ignore + + numpy.seterr_divide (('None', 'ignore', 'warn', 'raise', 'call', 'print', 'log')) + Doc: Sets numpy's behavior for division by zero, see numpy.seterr. 'None' means using the default, defined by numpy.seterr_all. + Value: None + + numpy.seterr_over (('None', 'ignore', 'warn', 'raise', 'call', 'print', 'log')) + Doc: Sets numpy's behavior for floating-point overflow, see numpy.seterr. 'None' means using the default, defined by numpy.seterr_all. + Value: None + + numpy.seterr_under (('None', 'ignore', 'warn', 'raise', 'call', 'print', 'log')) + Doc: Sets numpy's behavior for floating-point underflow, see numpy.seterr. 'None' means using the default, defined by numpy.seterr_all. + Value: None + + numpy.seterr_invalid (('None', 'ignore', 'warn', 'raise', 'call', 'print', 'log')) + Doc: Sets numpy's behavior for invalid floating-point operation, see numpy.seterr. 'None' means using the default, defined by numpy.seterr_all. + Value: None + + warn.ignore_bug_before (('0.6', 'None', 'all', '0.3', '0.4', '0.4.1', '0.5', '0.7')) + Doc: If 'None', we warn about all Theano bugs found by default. If 'all', we don't warn about Theano bugs found by default. If a version, we print only the warnings relative to Theano bugs found after that version. Warning for specific bugs can be configured with specific [warn] flags. + Value: 0.6 + + warn.argmax_pushdown_bug () + Doc: Warn if in past version of Theano we generated a bug with the theano.tensor.nnet.nnet.local_argmax_pushdown optimization. Was fixed 27 may 2010 + Value: False + + warn.gpusum_01_011_0111_bug () + Doc: Warn if we are in a case where old version of Theano had a silent bug with GpuSum pattern 01,011 and 0111 when the first dimensions was bigger then 4096. Was fixed 31 may 2010 + Value: False + + warn.sum_sum_bug () + Doc: Warn if we are in a case where Theano version between version 9923a40c7b7a and the 2 august 2010 (fixed date), generated an error in that case. This happens when there are 2 consecutive sums in the graph, bad code was generated. Was fixed 2 August 2010 + Value: False + + warn.sum_div_dimshuffle_bug () + Doc: Warn if previous versions of Theano (between rev. 3bd9b789f5e8, 2010-06-16, and cfc6322e5ad4, 2010-08-03) would have given incorrect result. This bug was triggered by sum of division of dimshuffled tensors. + Value: False + + warn.subtensor_merge_bug () + Doc: Warn if previous versions of Theano (before 0.5rc2) could have given incorrect results when indexing into a subtensor with negative stride (for instance, for instance, x[a:b:-1][c]). + Value: False + + warn.gpu_set_subtensor1 () + Doc: Warn if previous versions of Theano (before 0.6) could have given incorrect results when moving to the gpu set_subtensor(x[int vector], new_value) + Value: False + + warn.vm_gc_bug () + Doc: There was a bug that existed in the default Theano configuration, only in the development version between July 5th 2012 and July 30th 2012. This was not in a released version. If your code was affected by this bug, a warning will be printed during the code execution if you use the `linker=vm,vm.lazy=True,warn.vm_gc_bug=True` Theano flags. This warning is disabled by default as the bug was not released. + Value: False + + warn.signal_conv2d_interface () + Doc: Warn we use the new signal.conv2d() when its interface changed mid June 2014 + Value: True + + warn.reduce_join () + Doc: Your current code is fine, but Theano versions prior to 0.7 (or this development version) might have given an incorrect result. To disable this warning, set the Theano flag warn.reduce_join to False. The problem was an optimization, that modified the pattern "Reduce{scalar.op}(Join(axis=0, a, b), axis=0)", did not check the reduction axis. So if the reduction axis was not 0, you got a wrong answer. + Value: True + + warn.inc_set_subtensor1 () + Doc: Warn if previous versions of Theano (before 0.7) could have given incorrect results for inc_subtensor and set_subtensor when using some patterns of advanced indexing (indexing with one vector or matrix of ints). + Value: True + + compute_test_value (('off', 'ignore', 'warn', 'raise', 'pdb')) + Doc: If 'True', Theano will run each op at graph build time, using Constants, SharedVariables and the tag 'test_value' as inputs to the function. This helps the user track down problems in the graph before it gets optimized. + Value: off + + print_test_value () + Doc: If 'True', the __eval__ of a Theano variable will return its test_value when this is available. This has the practical conseguence that, e.g., in debugging `my_var` will print the same as `my_var.tag.test_value` when a test value is defined. + Value: False + + compute_test_value_opt (('off', 'ignore', 'warn', 'raise', 'pdb')) + Doc: For debugging Theano optimization only. Same as compute_test_value, but is used during Theano optimization + Value: off + + unpickle_function () + Doc: Replace unpickled Theano functions with None. This is useful to unpickle old graphs that pickled them when it shouldn't + Value: True + + reoptimize_unpickled_function () + Doc: Re-optimize the graph when a theano function is unpickled from the disk. + Value: False + + exception_verbosity (('low', 'high')) + Doc: If 'low', the text of exceptions will generally refer to apply nodes with short names such as Elemwise{add_no_inplace}. If 'high', some exceptions will also refer to apply nodes with long descriptions like: + A. Elemwise{add_no_inplace} + B. log_likelihood_v_given_h + C. log_likelihood_h + Value: low + + openmp () + Doc: Allow (or not) parallel computation on the CPU with OpenMP. This is the default value used when creating an Op that supports OpenMP parallelization. It is preferable to define it via the Theano configuration file ~/.theanorc or with the environment variable THEANO_FLAGS. Parallelization is only done for some operations that implement it, and even for operations that implement parallelism, each operation is free to respect this flag or not. You can control the number of threads used with the environment variable OMP_NUM_THREADS. If it is set to 1, we disable openmp in Theano by default. + Value: False + + openmp_elemwise_minsize () + Doc: If OpenMP is enabled, this is the minimum size of vectors for which the openmp parallelization is enabled in element wise ops. + Value: 200000 + + check_input () + Doc: Specify if types should check their input in their C code. It can be used to speed up compilation, reduce overhead (particularly for scalars) and reduce the number of generated C files. + Value: True + + cache_optimizations () + Doc: WARNING: work in progress, does not work yet. Specify if the optimization cache should be used. This cache will any optimized graph and its optimization. Actually slow downs a lot the first optimization, and could possibly still contains some bugs. Use at your own risks. + Value: False + + unittests.rseed () + Doc: Seed to use for randomized unit tests. Special value 'random' means using a seed of None. + Value: 666 + + compile.wait () + Doc: Time to wait before retrying to aquire the compile lock. + Value: 5 + + compile.timeout () + Doc: In seconds, time that a process will wait before deciding to + override an existing lock. An override only happens when the existing + lock is held by the same owner *and* has not been 'refreshed' by this + owner for more than this period. Refreshes are done every half timeout + period for running processes. + Value: 120 + + compiledir_format () + Doc: Format string for platform-dependent compiled module subdirectory + (relative to base_compiledir). Available keys: gxx_version, hostname, + numpy_version, platform, processor, python_bitwidth, + python_int_bitwidth, python_version, short_platform, theano_version. + Defaults to 'compiledir_%(short_platform)s-%(processor)s-%(python_vers + ion)s-%(python_bitwidth)s'. + Value: compiledir_%(short_platform)s-%(processor)s-%(python_version)s-%(python_bitwidth)s + + + Doc: platform-independent root directory for compiled modules + Value: /home/lijin/.theano + + + Doc: platform-dependent cache directory for compiled modules + Value: /home/lijin/.theano/compiledir_Linux-3.13--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64 + + cmodule.mac_framework_link () + Doc: If set to True, breaks certain MacOS installations with the infamous Bus Error + Value: False + + cmodule.warn_no_version () + Doc: If True, will print a warning when compiling one or more Op with C code that can't be cached because there is no c_code_cache_version() function associated to at least one of those Ops. + Value: False + + cmodule.remove_gxx_opt () + Doc: If True, will remove the -O* parameter passed to g++.This is useful to debug in gdb modules compiled by Theano.The parameter -g is passed by default to g++ + Value: False + + cmodule.compilation_warning () + Doc: If True, will print compilation warnings. + Value: False + + cmodule.preload_cache () + Doc: If set to True, will preload the C module cache at import time + Value: False + + gcc.cxxflags () + Doc: Extra compiler flags for gcc + Value: + + metaopt.verbose () + Doc: Enable verbose output for meta optimizers + Value: False + + optdb.position_cutoff () + Doc: Where to stop eariler during optimization. It represent the position of the optimizer where to stop. + Value: inf + + optdb.max_use_ratio () + Doc: A ratio that prevent infinite loop in EquilibriumOptimizer. + Value: 5.0 + + profile () + Doc: If VM should collect profile information + Value: False + + profile_optimizer () + Doc: If VM should collect optimizer profile information + Value: False + + profile_memory () + Doc: If VM should collect memory profile information and print it + Value: False + + + Doc: Useful only for the vm linkers. When lazy is None, auto detect if lazy evaluation is needed and use the apropriate version. If lazy is True/False, force the version used between Loop/LoopGC and Stack. + Value: None + + optimizer_excluding () + Doc: When using the default mode, we will remove optimizer with these tags. Separate tags with ':'. + Value: + + optimizer_including () + Doc: When using the default mode, we will add optimizer with these tags. Separate tags with ':'. + Value: + + optimizer_requiring () + Doc: When using the default mode, we will require optimizer with these tags. Separate tags with ':'. + Value: + + DebugMode.patience () + Doc: Optimize graph this many times to detect inconsistency + Value: 10 + + DebugMode.check_c () + Doc: Run C implementations where possible + Value: True + + DebugMode.check_py () + Doc: Run Python implementations where possible + Value: True + + DebugMode.check_finite () + Doc: True -> complain about NaN/Inf results + Value: True + + DebugMode.check_strides () + Doc: Check that Python- and C-produced ndarrays have same strides. On difference: (0) - ignore, (1) warn, or (2) raise error + Value: 0 + + DebugMode.warn_input_not_reused () + Doc: Generate a warning when destroy_map or view_map says that an op works inplace, but the op did not reuse the input for its output. + Value: True + + DebugMode.check_preallocated_output () + Doc: Test thunks with pre-allocated memory as output storage. This is a list of strings separated by ":". Valid values are: "initial" (initial storage in storage map, happens with Scan),"previous" (previously-returned memory), "c_contiguous", "f_contiguous", "strided" (positive and negative strides), "wrong_size" (larger and smaller dimensions), and "ALL" (all of the above). + Value: + + DebugMode.check_preallocated_output_ndim () + Doc: When testing with "strided" preallocated output memory, test all combinations of strides over that number of (inner-most) dimensions. You may want to reduce that number to reduce memory or time usage, but it is advised to keep a minimum of 2. + Value: 4 + + profiling.time_thunks () + Doc: Time individual thunks when profiling + Value: True + + profiling.n_apply () + Doc: Number of Apply instances to print by default + Value: 20 + + profiling.n_ops () + Doc: Number of Ops to print by default + Value: 20 + + profiling.output_line_width () + Doc: Max line width for the profiling output + Value: 512 + + profiling.min_memory_size () + Doc: For the memory profile, do not print Apply nodes if the size + of their outputs (in bytes) is lower than this threshold + Value: 1024 + + profiling.min_peak_memory () + Doc: The min peak memory usage of the order + Value: False + + profiling.destination () + Doc: + File destination of the profiling output + + Value: stderr + + profiling.debugprint () + Doc: + Do a debugprint of the profiled functions + + Value: False + + ProfileMode.n_apply_to_print () + Doc: Number of apply instances to print by default + Value: 15 + + ProfileMode.n_ops_to_print () + Doc: Number of ops to print by default + Value: 20 + + ProfileMode.min_memory_size () + Doc: For the memory profile, do not print apply nodes if the size of their outputs (in bytes) is lower then this threshold + Value: 1024 + + ProfileMode.profile_memory () + Doc: Enable profiling of memory used by Theano functions + Value: False + + on_shape_error (('warn', 'raise')) + Doc: warn: print a warning and use the default value. raise: raise an error + Value: warn + + tensor.insert_inplace_optimizer_validate_nb () + Doc: -1: auto, if graph have less then 500 nodes 1, else 10 + Value: -1 + + experimental.local_alloc_elemwise () + Doc: DEPRECATED: If True, enable the experimental optimization local_alloc_elemwise. Generates error if not True. Use optimizer_excluding=local_alloc_elemwise to dsiable. + Value: True + + experimental.local_alloc_elemwise_assert () + Doc: When the local_alloc_elemwise is applied, add an assert to highlight shape errors. + Value: True + + blas.ldflags () + Doc: lib[s] to include for [Fortran] level-3 blas implementation + Value: -lblas + + warn.identify_1pexp_bug () + Doc: Warn if Theano versions prior to 7987b51 (2011-12-18) could have yielded a wrong result due to a bug in the is_1pexp function + Value: False + + scan.allow_gc () + Doc: Allow/disallow gc inside of Scan (default: False) + Value: False + + scan.allow_output_prealloc () + Doc: Allow/disallow memory preallocation for outputs inside of scan (default: True) + Value: True + + pycuda.init () + Doc: If True, always initialize PyCUDA when Theano want to + initilize the GPU. Currently, we must always initialize + PyCUDA before Theano do it. Setting this flag to True, + ensure that, but always import PyCUDA. It can be done + manually by importing theano.misc.pycuda_init before theano + initialize the GPU device. + + Value: False + + cublas.lib () + Doc: Name of the cuda blas library for the linker. + Value: cublas + + lib.cnmem () + Doc: Do we enable CNMeM or not (a faster CUDA memory allocator). + + The parameter represent the start size (in MB or % of + total GPU memory) of the memory pool. + + 0: not enabled. + 0 < N <= 1: % of the total GPU memory (clipped to .985 for driver memory) + > 0: use that number of MB of memory. + + + Value: 0.0 + + + + + Using gpu device 1: Tesla K10.G2.8GB (CNMeM is disabled) + + +这些配置影响着 `theano` 的运行,很多的参数都是只读的,因此,**我们应当尽量避免在程序中直接修改这些参数**。 + +大部分参数都有指定的默认值,我们可以在 `.theanorc` 文件中对配置进行修改,也可以在环境变量 `THEANO_FLAGS` 中进行修改,它们的优先级顺序如下: + +- 首先是对 `theano.config.` 的赋值 +- 然后是 `THEANO_FLAGS` 环境变量指定的内容 +- 最后是 `.theanorc` 文件或者 `THEANORC` 环境变量所指示的文件中的内容 + +具体的参数含义可以参考: + +http://deeplearning.net/software/theano/library/config.html + +### 环境变量 THEANO_FLAGS + +使用 `THEANO_FLAGS` 环境变量,运行程序的方法如下: + + THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' python .py + +如果是 `window` 下,则需要进行稍微的改动: + + set THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True' && python .py + +示例中的配置将浮点数的精度设为了 `32` 位,并将使用 `GPU 0` 和 `CUDA` 的 `fastmath` 模式进行编译和运算。 + +### 配置文件 THEANORC + +环境变量 `THEANORC` 的默认位置为 `$HOME/.theanorc` (`windows` 下为 `$HOME/.theanorc:$HOME/.theanorc.txt`)。 + +与前面 `THEANO_FLAGS` 指定的内容相同的配置文件为: + + [global] + floatX = float32 + device = gpu0 + + [nvcc] + fastmath = True + + + +这里 `[golbal]` 对应的是 `config` 中的参数,如 `config.device, config.mode`; `config` 的子模块中的参数,如 `config.nvcc.fastmath, config.blas.ldflags` 则需要用 `[nvcc], [blas]` 的部分去设置。 + +## 模式 + +每次调用 `theano.function` 的时候,那些符号变量之间的结构会被优化和计算,而优化和计算的模式都是由 `config.mode` 所决定的。 + +`Theano` 中定义了这四种模式: + +- `FAST_COMPILE` + - `compile.mode.Mode(linker='py', optimizer='fast_compile')` + - `Python` 实现,构造很快,运行慢 +- `FAST_RUN` + - `compile.mode.Mode(linker='cvm', optimizer='fast_run')` + - `C` 实现,构造较慢,运行快 +- `DebugMode` + - `compile.debugmode.DebugMode()` + - 调试模式,两种实现都可以 +- `ProfileMode` + - `compile.profilemode.ProfileMode()` + - `C` 实现,已经停用,使用 `theano.profile` 替代 + +更多的细节,可以参考: + +http://deeplearning.net/software/theano/library/compile/mode.html#libdoc-compile-mode + +### Linkers + +从上面的定义可以看出,一个模式由两部分构成,`optimizer` 和 `linker`, `ProfileMode` 和 `DebugMode` 模式使用自带的 `linker`。 + +可用的 `linker` 可以从下表中查看: + +http://deeplearning.net/software/theano/tutorial/modes.html#linkers + +### 使用 DebugMode + +一般在使用 `FAST_RUN` 或者 `FAST_COMPILE` 模式之前,最好先用 `DebugMode` 进行调试,不过速度会比前两个模式慢得多。 + +我们用一个实例看一下两者的区别: + + +```python +x = T.dvector('x') + +f_1 = theano.function([x], 10 / x) + +print f_1([5]) +print f_1([0]) +print f_1([7]) +``` + + [ 2.] + [ inf] + [ 1.42857143] + + +在非 Debug 模式下,除以 0 是合法的,但是在 `DebugMode` 下,会给出错误,帮助我们进行调试: + + +```python +f_2 = theano.function([x], 10 / x, mode='DebugMode') + +print f_2([5]) +print f_2([0]) +print f_2([7]) +``` + + [ 2.] + + + + --------------------------------------------------------------------------- + + InvalidValueError Traceback (most recent call last) + + in () + 2 + 3 print f_2([5]) + ----> 4 print f_2([0]) + 5 print f_2([7]) + + + /usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs) + 857 t0_fn = time.time() + 858 try: + --> 859 outputs = self.fn() + 860 except Exception: + 861 if hasattr(self.fn, 'position_of_error'): + + + /usr/local/lib/python2.7/dist-packages/theano/compile/debugmode.pyc in deco() + 2339 self.maker.mode.check_isfinite + 2340 try: + -> 2341 return f() + 2342 finally: + 2343 # put back the filter_checks_isfinite + + + /usr/local/lib/python2.7/dist-packages/theano/compile/debugmode.pyc in f() + 2079 raise InvalidValueError(r, storage_map[r][0], + 2080 hint='perform output', + -> 2081 specific_hint=hint2) + 2082 warn_inp = config.DebugMode.warn_input_not_reused + 2083 py_inplace_outs = _check_inputs( + + + InvalidValueError: InvalidValueError + type(variable) = TensorType(float64, vector) + variable = Elemwise{true_div,no_inplace}.0 + type(value) = + dtype(value) = float64 + shape(value) = (1,) + value = [ inf] + min(value) = inf + max(value) = inf + isfinite = False + client_node = None + hint = perform output + specific_hint = non-finite elements not allowed + context = ... + Elemwise{true_div,no_inplace} [id A] '' + |TensorConstant{(1,) of 10.0} [id B] + |x [id C] + + + + +更多细节可以参考: + +http://deeplearning.net/software/theano/library/compile/debugmode.html#debugmode diff --git a/docs/09-theano/09.06-conditions-in-theano.md b/docs/09-theano/09.06-conditions-in-theano.md new file mode 100644 index 00000000..8a332841 --- /dev/null +++ b/docs/09-theano/09.06-conditions-in-theano.md @@ -0,0 +1,91 @@ + +# Theano 条件语句 + +`theano` 中提供了两种条件语句,`ifelse` 和 `switch`,两者都是用于在符号变量上使用条件语句: + +- `ifelse(condition, var1, var2)` + - 如果 `condition` 为 `true`,返回 `var1`,否则返回 `var2` +- `switch(tensor, var1, var2)` + - Elementwise `ifelse` 操作,更一般化 +- `switch` 会计算两个输出,而 `ifelse` 只会根据给定的条件,计算相应的输出。 + +`ifelse` 需要从 `theano.ifelse` 中导入,而 `switch` 在 `theano.tensor` 模块中。 + + +```python +import theano, time +import theano.tensor as T +import numpy as np +from theano.ifelse import ifelse +``` + + Using gpu device 1: Tesla K10.G2.8GB (CNMeM is disabled) + + +假设我们有两个标量参数:$a, b$,和两个矩阵 $\mathbf{x, y}$,定义函数为: + +$$ +\mathbf z = f(a, b,\mathbf{x, y}) = \left\{ +\begin{aligned} + \mathbf x & ,\ a <= b\\ + \mathbf y & ,\ a > b +\end{aligned} +\right. +$$ + +定义变量: + + +```python +a, b = T.scalars('a', 'b') +x, y = T.matrices('x', 'y') +``` + +用 `ifelse` 构造,小于等于用 `T.lt()`,大于等于用 `T.gt()`: + + +```python +z_ifelse = ifelse(T.lt(a, b), x, y) + +f_ifelse = theano.function([a, b, x, y], z_ifelse) +``` + +用 `switch` 构造: + + +```python +z_switch = T.switch(T.lt(a, b), x, y) + +f_switch = theano.function([a, b, x, y], z_switch) +``` + +测试数据: + + +```python +val1 = 0. +val2 = 1. +big_mat1 = np.ones((10000, 1000), dtype=theano.config.floatX) +big_mat2 = np.ones((10000, 1000), dtype=theano.config.floatX) +``` + +比较两者的运行速度: + + +```python +n_times = 10 + +tic = time.clock() +for i in xrange(n_times): + f_switch(val1, val2, big_mat1, big_mat2) +print 'time spent evaluating both values %f sec' % (time.clock() - tic) + +tic = time.clock() +for i in xrange(n_times): + f_ifelse(val1, val2, big_mat1, big_mat2) +print 'time spent evaluating one value %f sec' % (time.clock() - tic) +``` + + time spent evaluating both values 0.638598 sec + time spent evaluating one value 0.461249 sec + diff --git a/docs/09-theano/09.07-loop-with-scan.md b/docs/09-theano/09.07-loop-with-scan.md new file mode 100644 index 00000000..16bf6817 --- /dev/null +++ b/docs/09-theano/09.07-loop-with-scan.md @@ -0,0 +1,522 @@ + +# Theano 循环:scan(详解) + + +```python +import theano, time +import theano.tensor as T +import numpy as np + +def floatX(X): + return np.asarray(X, dtype=theano.config.floatX) +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +`theano` 中可以使用 `scan` 进行循环,常用的 `map` 和 `reduce` 操作都可以看成是 `scan` 的特例。 + +`scan` 通常作用在一个序列上,每次处理一个输入,并输出一个结果。 + +`sum(x)` 函数可以看成是 `z + x(i)` 函数在给定 `z = 0` 的情况下,对 `x` 的一个 `scan`。 + +通常我们可以将一个 `for` 循环表示成一个 `scan` 操作,其好处如下: + +- 迭代次数成为符号图结构的一部分 +- 最小化 GPU 数据传递 +- 序列化梯度计算 +- 速度比 `for` 稍微快一些 +- 降低内存使用 + +## scan 的使用 + +函数的用法如下: + + theano.scan(fn, + sequences=None, + outputs_info=None, + non_sequences=None, + n_steps=None, + truncate_gradient=-1, + go_backwards=False, + mode=None, + name=None, + profile=False, + allow_gc=None, + strict=False) + +主要参数的含义: + +- `fn` + - 一步 `scan` 所进行的操作 +- `sequences` + - 输入的序列 +- `outputs_info` + - 前一步输出结果的初始状态 +- `non_sequences` + - 非序列参数 +- `n_steps` + - 迭代步数 +- `go_backwards` + - 是否从后向前遍历 + +输出为一个元组 `(outputs, updates)`: + +- `outputs` + - 从初始状态开始,每一步 `fn` 的输出结果 +- `updates` + - 一个字典,用来记录 `scan` 过程中用到的共享变量更新规则,构造函数的时候,如果需要更新共享变量,将这个变量当作 `updates` 的参数传入。 + +## scan 和 map + +这里实现一个简单的 `map` 操作,将向量 $\mathbf x$ 中的所有元素变成原来的两倍: + +```python +map(lambda t: t * 2, x) +``` + + +```python +x = T.vector() + +results, _ = theano.scan(fn = lambda t: t * 2, + sequences = x) +x_double_scan = theano.function([x], results) + +print x_double_scan(range(10)) +``` + + [ 0. 2. 4. 6. 8. 10. 12. 14. 16. 18.] + + +之前我们说到,`theano` 中的 `map` 是 `scan` 的一个特例,因此 `theano.map` 的用法其实跟 `theano.scan` 十分类似。 + +由于不需要考虑前一步的输出结果,所以 `theano.map` 的参数中没有 `outputs_info` 这一部分。 + +我们用 `theano.map` 实现相同的效果: + + +```python +result, _ = theano.map(fn = lambda t: t * 2, + sequences = x) +x_double_map = theano.function([x], result) + +print x_double_map(range(10)) +``` + + [ 0. 2. 4. 6. 8. 10. 12. 14. 16. 18.] + + +## scan 和 reduce + +这里一个简单的 `reduce` 操作,求和: + +```python +reduce(lambda a, b: a + b, x) +``` + + +```python +result, _ = theano.scan(fn = lambda t, v: t + v, + sequences = x, + outputs_info = floatX(0.)) + +# 因为每一步的输出值都会被记录到最后的 result 中,所以最后的和是 result 的最后一个元素。 +x_sum_scan = theano.function([x], result[-1]) + +# 计算 1 + 2 + ... + 10 +print x_sum_scan(range(10)) +``` + + 45.0 + + +`theano.reduce` 也是 `scan` 的一个特例,使用 `theano.reduce` 实现相同的效果: + + +```python +result, _ = theano.reduce(fn = lambda t, v: t + v, + sequences = x, + outputs_info = 0.) + +x_sum_reduce = theano.function([x], result) + +# 计算 1 + 2 + ... + 10 +print x_sum_reduce(range(10)) +``` + + 45.0 + + +`reduce` 与 `scan` 不同的地方在于,`result` 包含的内容并不是每次输出的结果,而是最后一次输出的结果。 + +## scan 的使用 + +### 输入与输出 + +`fn` 是一个函数句柄,对于这个函数句柄,它每一步接受的参数是由 `sequences, outputs_info, non_sequence` 这三个参数所决定的,并且按照以下的顺序排列: + +- `sequences` 中第一个序列的值 +- ... +- `sequences` 中最后一个序列的值 +- `outputs_info` 中第一个输出之前的值 +- ... +- `outputs_info` 中最后一个输出之前的值 +- `non_squences` 中的参数 + +这些序列的顺序与在参数 `sequences, outputs_info` 中指定的顺序相同。 + +默认情况下,在第 `k` 次迭代时,如果 `sequences` 和 `outputs_info` 中给定的值不是字典(`dictionary`)或者一个字典列表(`list of dictionaries`),那么 + +- `sequences` 中的序列 `seq` 传入 `fn` 的是 `seq[k]` 的值 +- `outputs_info` 中的序列 `output` 传入 `fn` 的是 `output[k-1]` 的值 + +`fn` 的返回值有两部分 `(outputs_list, update_dictionary)`,第一部分将作为序列,传入 `outputs` 中,与 `outputs_info` 中的**初始输入值的维度一致**(如果没有给定 `outputs_info` ,输出值可以任意。) + +第二部分则是更新规则的字典,告诉我们如何对 `scan` 中使用到的一些共享的变量进行更新: +```python +return [y1_t, y2_t], {x:x+1} +``` + +这两部分可以任意,即顺序既可以是 `(outputs_list, update_dictionary)`, 也可以是 `(update_dictionary, outputs_list)`,`theano` 会根据类型自动识别。 + +两部分只需要有一个存在即可,另一个可以为空。 + +### 例子分析 + +例如,在我们的第一个例子中 + +```python +theano.scan(fn = lambda t: t * 2, + sequences = x) +``` + +在第 `k` 次迭代的时候,传入参数 `t` 的值为 `x[k]`。 + +再如,在我们的第二个例子中: + +```python +theano.scan(fn = lambda t, v: t + v, + sequences = x, + outputs_info = floatX(0.)) +``` + +`fn` 接受了两个参数,初始迭代时,按照规则,`t` 接受的参数为 `x[0]`,`v` 接受的参数为我们传入 `outputs_info` 的第一个初始值即 `0` (认为是 `outputs[-1]`),他们的结果 `t+v` 将作为 `outputs[0]` 的值传入下一次迭代以及最终 `scan` 输出的 `outputs` 值中。 + +### 输入多个序列 + +我们可以一次输入多个序列,这些序列会按照顺序传入 fn 的参数中,例如计算多项式 +$$ +\sum_{n=0}^N a_n x^ n +$$ +时,我们可以将多项式的系数和幂数两个序列放到一个 `list` 中作为输入参数: + + +```python +# 变量 x +x = T.scalar("x") + +# 不为 0 的系数 +A = T.vectors("A") + +# 对应的幂数 +N = T.ivectors("N") + +# a 对应的是 A, n 对应 N,v 对应 x +components, _ = theano.scan(fn = lambda a, n, v: a * (v ** n), + sequences = [A, N], + non_sequences = x) + +result = components.sum() + +polynomial = theano.function([x, A, N], result) + +# 计算 1 + 3 * 10 ^ 2 + 2 * 10^3 = 2301 +print polynomial(floatX(10), + floatX([1, 3, 2]), + [0, 2, 3]) +``` + + 2301.0 + + +### 使用序列的多个值 + +默认情况下,我们只能使用输入序列的当前时刻的值,以及前一个输出的输出值。 + +事实上,`theano` 会将参数中的序列变成一个有 `input` 和 `taps` 两个键值的 `dict`: + +- `input`:输入的序列 +- `taps`:要传入 `fn` 的值的列表 + - 对于 `sequences` 参数中的序列来说,默认值为 [0],表示时间 `t` 传入 `t+0` 时刻的序列值,可以为正,可以为负。 + - 对于 `outputs_info` 参数中的序列来说,默认值为 [-1],表示时间 `t` 传入 `t-1` 时刻的序列值,只能为负值,如果值为 `None`,表示这个输出结果不会作为参数传入 `fn` 中。 + +传入 `fn` 的参数也会按照 `taps` 中的顺序来排列,我们考虑下面这个例子: +```python +scan(fn, sequences = [ dict(input= Sequence1, taps = [-3,2,-1]) + , Sequence2 + , dict(input = Sequence3, taps = 3) ] + , outputs_info = [ dict(initial = Output1, taps = [-3,-5]) + , dict(initial = Output2, taps = None) + , Output3 ] + , non_sequences = [ Argument1, Argument2]) +``` +首先是 `Sequence1` 的 `[-3, 2, -1]` 被传入,然后 `Sequence2` 不是 `dict`, 所以传入默认值 `[0]`,`Sequence3` 传入的参数是 `3`,所以 `fn` 在第 `t` 步接受的前几个参数是: +``` +Sequence1[t-3] +Sequence1[t+2] +Sequence1[t-1] +Sequence2[t] +Sequence3[t+3] +``` + +然后 `Output1` 传入的是 `[-3, -5]`(**传入的初始值的形状应为 `shape (5,)+`**),`Output2` 不作为参数传入,`Output3` 传入的是 `[-1]`,所以接下的参数是: +``` +Output1[t-3] +Output1[t-5] +Output3[t-1] +Argument1 +Argument2 +``` + +总的说来上面的例子中,`fn` 函数按照以下顺序最多接受这样 10 个参数: +``` +Sequence1[t-3] +Sequence1[t+2] +Sequence1[t-1] +Sequence2[t] +Sequence3[t+3] +Output1[t-3] +Output1[t-5] +Output3[t-1] +Argument1 +Argument2 +``` + +例子,假设 $x$ 是我们的输入,$y$ 是我们的输出,我们需要计算 $y(t) = tanh\left[W_{1} y(t-1) + W_{2} x(t) + W_{3} x(t-1)\right]$ 的值: + + +```python +X = T.matrix("X") +Y = T.vector("y") + +W_1 = T.matrix("W_1") +W_2 = T.matrix("W_2") +W_3 = T.matrix("W_3") + +# W_yy 和 W_xy 作为不变的参数可以直接使用 +results, _ = theano.scan(fn = lambda x, x_pre, y: T.tanh(T.dot(W_1, y) + T.dot(W_2, x) + T.dot(W_3, x_pre)), + # 0 对应 x,-1 对应 x_pre + sequences = dict(input=X, taps=[0, -1]), + outputs_info = Y) + +Y_seq = theano.function(inputs = [X, Y, W_1, W_2, W_3], + outputs = results) +``` + +测试小矩阵计算: + + +```python +# 测试 +t = 1001 +x_dim = 10 +y_dim = 20 + +x = 2 * floatX(np.random.random([t, x_dim])) - 1 +y = 2 * floatX(np.zeros(y_dim)) - 1 +w_1 = 2 * floatX(np.random.random([y_dim, y_dim])) - 1 +w_2 = 2 * floatX(np.random.random([y_dim, x_dim])) - 1 +w_3 = 2 * floatX(np.random.random([y_dim, x_dim])) - 1 + +tic = time.time() + +y_res_theano = Y_seq(x, y, w_1, w_2, w_3) + +print "theano running time {:.4f} s".format(time.time() - tic) + +tic = time.time() +# 与 numpy 的结果进行比较: +y_res_numpy = np.zeros([t, y_dim]) +y_res_numpy[0] = y + +for i in range(1, t): + y_res_numpy[i] = np.tanh(w_1.dot(y_res_numpy[i-1]) + w_2.dot(x[i]) + w_3.dot(x[i-1])) + +print "numpy running time {:.4f} s".format(time.time() - tic) + +# 这里要从 1 开始,因为使用了 x(t-1),所以 scan 从第 1 个位置开始计算 +print "the max difference of the first 10 results is", np.max(np.abs(y_res_theano[0:10] - y_res_numpy[1:11])) +``` + + theano running time 0.0537 s + numpy running time 0.0197 s + the max difference of the first 10 results is 1.25780650354e-06 + + +测试大矩阵运算: + + +```python +# 测试 +t = 1001 +x_dim = 100 +y_dim = 200 + +x = 2 * floatX(np.random.random([t, x_dim])) - 1 +y = 2 * floatX(np.zeros(y_dim)) - 1 +w_1 = 2 * floatX(np.random.random([y_dim, y_dim])) - 1 +w_2 = 2 * floatX(np.random.random([y_dim, x_dim])) - 1 +w_3 = 2 * floatX(np.random.random([y_dim, x_dim])) - 1 + +tic = time.time() + +y_res_theano = Y_seq(x, y, w_1, w_2, w_3) + +print "theano running time {:.4f} s".format(time.time() - tic) + +tic = time.time() +# 与 numpy 的结果进行比较: +y_res_numpy = np.zeros([t, y_dim]) +y_res_numpy[0] = y + +for i in range(1, t): + y_res_numpy[i] = np.tanh(w_1.dot(y_res_numpy[i-1]) + w_2.dot(x[i]) + w_3.dot(x[i-1])) + +print "numpy running time {:.4f} s".format(time.time() - tic) + +# 这里要从 1 开始,因为使用了 x(t-1),所以 scan 从第 1 个位置开始计算 +print "the max difference of the first 10 results is", np.max(np.abs(y_res_theano[:10] - y_res_numpy[1:11])) +``` + + theano running time 0.0754 s + numpy running time 0.1334 s + the max difference of the first 10 results is 0.000656997077348 + + +值得注意的是,由于 `theano` 和 `numpy` 在某些计算的实现上存在一定的差异,随着序列长度的增加,这些差异将被放大: + + +```python +for i in xrange(20): + print "iter {:03d}, max diff:{:.6f}".format(i + 1, + np.max(np.abs(y_res_numpy[i + 1,:] - y_res_theano[i,:]))) +``` + + iter 001, max diff:0.000002 + iter 002, max diff:0.000005 + iter 003, max diff:0.000007 + iter 004, max diff:0.000010 + iter 005, max diff:0.000024 + iter 006, max diff:0.000049 + iter 007, max diff:0.000113 + iter 008, max diff:0.000145 + iter 009, max diff:0.000334 + iter 010, max diff:0.000657 + iter 011, max diff:0.001195 + iter 012, max diff:0.002778 + iter 013, max diff:0.004561 + iter 014, max diff:0.004748 + iter 015, max diff:0.014849 + iter 016, max diff:0.012696 + iter 017, max diff:0.043639 + iter 018, max diff:0.046540 + iter 019, max diff:0.083032 + iter 020, max diff:0.123678 + + +### 控制循环次数 + +假设我们要计算方阵$A$的$A^k$,$k$ 是一个未知变量,我们可以这样通过 `n_steps` 参数来控制循环计算的次数: + + +```python +A = T.matrix("A") +k = T.iscalar("k") + +results, _ = theano.scan(fn = lambda P, A: P.dot(A), + # 初始值设为单位矩阵 + outputs_info = T.eye(A.shape[0]), + # 乘 k 次 + non_sequences = A, + n_steps = k) + +A_k = theano.function(inputs = [A, k], outputs = results[-1]) + +test_a = floatX([[2, -2], [-1, 2]]) + +print A_k(test_a, 10) + +# 使用 numpy 进行验证 +a_k = np.eye(2) +for i in range(10): + a_k = a_k.dot(test_a) + +print a_k +``` + + [[ 107616. -152192.] + [ -76096. 107616.]] + [[ 107616. -152192.] + [ -76096. 107616.]] + + +### 使用共享变量 + +可以在 `scan` 中使用并更新共享变量,例如,利用共享变量 `n`,我们可以实现这样一个迭代 `k` 步的简单计数器: + + +```python +n = theano.shared(floatX(0)) +k = T.iscalar("k") + +# 这里 lambda 的返回值是一个 dict,因此这个值会被传入 updates 中 +_, updates = theano.scan(fn = lambda n: {n:n+1}, + non_sequences = n, + n_steps = k) + +counter = theano.function(inputs = [k], + outputs = [], + updates = updates) + +print n.get_value() +counter(10) +print n.get_value() +counter(10) +print n.get_value() +``` + + 0.0 + 10.0 + 20.0 + + +之前说到,`fn` 函数的返回值应该是 `(outputs_list, update_dictionary)` 或者 `(update_dictionary, outputs_list)` 或者两者之一。 + +这里 `fn` 函数返回的是一个字典,因此自动被放入了 `update_dictionary` 中,然后传入 `function` 的 `updates` 参数中进行迭代。 + +### 使用条件语句结束循环 + +我们可以将 `scan` 设计为 `loop-until` 的模式,具体方法是在 `scan` 中,将 `fn` 的返回值增加一个参数,使用 `theano.scan_module` 来设置停止条件。 + +假设我们要计算所有不小于某个值的 2 的幂,我们可以这样定义: + + +```python +max_value = T.scalar() + +results, _ = theano.scan(fn = lambda v_pre, max_v: (v_pre * 2, theano.scan_module.until(v_pre * 2 > max_v)), + outputs_info = T.constant(1.), + non_sequences = max_value, + n_steps = 1000) + +# 注意,这里不能取 results 的全部 +# 例如在输入值为 40 时,最后的输出可以看成 (64, False) +# scan 发现停止条件满足,停止循环,但是不影响 64 被输出到 results 中,因此要将 64 去掉 +power_of_2 = theano.function(inputs = [max_value], outputs = results[:-1]) + +print power_of_2(40) +``` + + [ 2. 4. 8. 16. 32.] + diff --git a/docs/09-theano/09.08-linear-regression.md b/docs/09-theano/09.08-linear-regression.md new file mode 100644 index 00000000..be4dc010 --- /dev/null +++ b/docs/09-theano/09.08-linear-regression.md @@ -0,0 +1,196 @@ + +# Theano 实例:线性回归 + +## 基本模型 + +在用 `theano` 进行线性回归之前,先回顾一下 `theano` 的运行模式。 + +`theano` 是一个符号计算的数学库,一个基本的 `theano` 结构大致如下: + +- 定义符号变量 +- 编译用符号变量定义的函数,使它能够用这些符号进行数值计算。 +- 将函数应用到数据上去 + + +```python +%matplotlib inline +from matplotlib import pyplot as plt +import numpy as np +import theano +from theano import tensor as T +``` + + Using gpu device 0: GeForce GTX 850M + + +简单的例子:$y = a \times b, a, b \in \mathbb{R}$ + +定义 $a, b, y$: + + +```python +a = T.scalar() +b = T.scalar() + +y = a * b +``` + +编译函数: + + +```python +multiply = theano.function(inputs=[a, b], outputs=y) +``` + +将函数运用到数据上: + + +```python +print multiply(3, 2) # 6 +print multiply(4, 5) # 20 +``` + + 6.0 + 20.0 + + +## 线性回归 + +回到线性回归的模型,假设我们有这样的一组数据: + + +```python +train_X = np.linspace(-1, 1, 101) +train_Y = 2 * train_X + 1 + np.random.randn(train_X.size) * 0.33 +``` + +分布如图: + + +```python +plt.scatter(train_X, train_Y) +plt.show() +``` + + +![png](output_14_0.png) + + +### 定义符号变量 + +我们使用线性回归的模型对其进行模拟: +$$\bar{y} = wx + b$$ + +首先我们定义 $x, y$: + + +```python +X = T.scalar() +Y = T.scalar() +``` + +可以在定义时候直接给变量命名,也可以之后修改变量的名字: + + +```python +X.name = 'x' +Y.name = 'y' +``` + +我们的模型为: + + +```python +def model(X, w, b): + return X * w + b +``` + +在这里我们希望模型得到 $\bar{y}$ 与真实的 $y$ 越接近越好,常用的平方损失函数如下: +$$C = |\bar{y}-y|^2$$ + +有了损失函数,我们就可以使用梯度下降法来迭代参数 $w, b$ 的值,为此,我们将 $w$ 和 $b$ 设成共享变量: + + +```python +w = theano.shared(np.asarray(0., dtype=theano.config.floatX)) +w.name = 'w' +b = theano.shared(np.asarray(0., dtype=theano.config.floatX)) +b.name = 'b' +``` + +定义 $\bar y$: + + +```python +Y_bar = model(X, w, b) + +theano.pp(Y_bar) +``` + + + + + '((x * HostFromGpu(w)) + HostFromGpu(b))' + + + +损失函数及其梯度: + + +```python +cost = T.mean(T.sqr(Y_bar - Y)) +grads = T.grad(cost=cost, wrt=[w, b]) +``` + +定义梯度下降规则: + + +```python +lr = 0.01 +updates = [[w, w - grads[0] * lr], + [b, b - grads[1] * lr]] +``` + +### 编译训练模型 + +每运行一次,参数 $w, b$ 的值就更新一次: + + +```python +train_model = theano.function(inputs=[X,Y], + outputs=cost, + updates=updates, + allow_input_downcast=True) +``` + +### 将训练函数应用到数据上 + +训练模型,迭代 100 次: + + +```python +for i in xrange(100): + for x, y in zip(train_X, train_Y): + train_model(x, y) +``` + +显示结果: + + +```python +print w.get_value() # 接近 2 +print b.get_value() # 接近 1 + +plt.scatter(train_X, train_Y) +plt.plot(train_X, w.get_value() * train_X + b.get_value(), 'r') + +plt.show() +``` + + 1.94257426262 + 1.00938093662 + + + +![png](output_37_1.png) + diff --git a/docs/09-theano/09.09-logistic-regression-.md b/docs/09-theano/09.09-logistic-regression-.md new file mode 100644 index 00000000..9480c721 --- /dev/null +++ b/docs/09-theano/09.09-logistic-regression-.md @@ -0,0 +1,252 @@ + +# Theano 实例:Logistic 回归 + + +```python +%matplotlib inline +import numpy as np +import matplotlib.pyplot as plt +import theano +import theano.tensor as T +``` + + Using gpu device 0: GeForce GTX 850M + + +## sigmoid 函数 + +一个 `logistic` 曲线由 `sigmoid` 函数给出: +$$s(x) = \frac{1}{1+e^{-x}}$$ + +我们来定义一个 `elementwise` 的 sigmoid 函数: + + +```python +x = T.matrix('x') +s = 1 / (1 + T.exp(-x)) +sigmoid = theano.function([x], s, allow_input_downcast=True) +``` + +这里 `allow_input_downcast=True` 的作用是允许输入 `downcast` 成定义的输入类型: + + +```python +sigmoid([[ 0, 1], + [-1,-2]]) +``` + + + + + array([[ 0.5 , 0.7310586 ], + [ 0.26894143, 0.11920293]], dtype=float32) + + + +其图像如下所示: + + +```python +X = np.linspace(-6, 6, 100) +X = X[np.newaxis,:] + +plt.figure(figsize=(12,5)) + +plt.plot(X.flatten(), sigmoid(X).flatten(), linewidth=2) + +# 美化图像的操作 +#========================= +plt.grid('on') +plt.yticks([0,0.5,1]) + +ax = plt.gca() +ax.spines['right'].set_color('none') +ax.spines['top'].set_color('none') + +ax.yaxis.set_ticks_position('left') +ax.spines['left'].set_position(('data', 0)) + +plt.legend([r'$s(x)=\frac{1}{1+e^{-x}}$'], loc=0, fontsize=20) +#========================= + +plt.show() +``` + + +![png](output_9_0.png) + + +## sigmoid 函数与 tanh 函数的关系 + +`sigmoid` 函数与 `tanh` 之间有如下的转化关系: +$$s(x)=\frac{1}{1+e^{-x}}=\frac{1+\tanh(x/2)}{2}$$ + + +```python +s2 = (1 + T.tanh(x / 2)) / 2 + +sigmoid2 = theano.function([x], s2) + +sigmoid2([[ 0, 1], + [-1,-2]]) +``` + + + + + array([[ 0.5 , 0.7310586 ], + [ 0.26894143, 0.11920291]], dtype=float32) + + + +## logistic 回归 + +简单的二元逻辑回归问题可以这样描述:我们要对数据点 $x = (x_1, ..., x_n)$ 进行 0-1 分类,参数为 $w = (w_1, ..., w_n), b$,我们的假设函数如下: + +$$ +\begin{align} +h_{w,b}(x) & = P(Y=1|X=x) \\ +& = sigmoid(z) \\ +& =\frac{1}{1 + e^{-z}}\\ +\end{align} +$$ + +其中 + +$$ +\begin{align} +z & = x_1w_1 + ... + x_nw_n + b\\ +& = w^T x + b\\ +\end{align} +$$ + +对于一个数据点 $(x, y), y\in \{0,1\}$ 来说,我们的目标是希望 $h_{w,b}(x)$ 的值尽量接近于 $y$。 + +由于数值在 0-1 之间,我们用交叉熵来衡量 $h_{w,b}(x)$ 和 $y$ 的差异: + +$$- y \log(h_{w,b}(x)) - (1-y) \log(1-h_{w,b}(x))$$ + +对于一组数据,我们定义损失函数为所有差异的均值,然后通过梯度下降法来优化损失函数,得到最优的参数 $w, b$。 + +## 实例 + +生成随机数据: + + +```python +rng = np.random + +# 数据大小和规模 +N = 400 +feats = 784 + +# D = (X, Y) +D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2)) +``` + +定义 `theano` 变量: + + +```python +x = T.matrix('x') +y = T.vector('y') + +# 要更新的变量: +w = theano.shared(rng.randn(feats), name='w') +b = theano.shared(0., name='b') +``` + +定义模型: + + +```python +h = 1 / (1 + T.exp(-T.dot(x, w) - b)) +``` + +当 $h > 0.5$ 时,认为该类的标签为 1: + + +```python +prediction = h > 0.5 +``` + +损失函数和梯度: + + +```python +cost = - T.mean(y * T.log(h) + (1 - y) * T.log(1 - h)) + 0.01 * T.sum(w ** 2) # 正则项,防止过拟合 +gw, gb = T.grad(cost, [w, b]) +``` + +编译训练和预测函数: + + +```python +train = theano.function(inputs=[x, y], + outputs=cost, + updates=[[w, w - 0.1 * gw], [b, b - 0.1 * gb]], + allow_input_downcast=True) + +predict = theano.function(inputs=[x], + outputs=prediction, + allow_input_downcast=True) +``` + + +```python +for i in xrange(10001): + err = train(D[0], D[1]) + if i % 1000 == 0: + print 'iter %5d, error %f' % (i, err) +``` + + iter 0, error 19.295896 + iter 1000, error 0.210341 + iter 2000, error 0.126124 + iter 3000, error 0.124872 + iter 4000, error 0.124846 + iter 5000, error 0.124845 + iter 6000, error 0.124845 + iter 7000, error 0.124845 + iter 8000, error 0.124845 + iter 9000, error 0.124845 + iter 10000, error 0.124845 + + +查看结果: + + +```python +print D[1] +``` + + [0 0 0 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 1 0 + 1 1 0 0 0 0 1 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1 0 0 + 0 1 0 1 0 0 1 1 0 1 0 0 0 1 0 1 1 1 0 1 1 0 1 0 0 1 0 0 1 0 1 1 1 1 0 1 0 + 0 0 0 1 0 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 1 + 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 0 1 1 0 0 1 1 1 1 1 + 1 0 1 0 0 1 0 0 1 1 1 1 0 1 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 + 1 0 1 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 1 0 1 1 + 0 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 + 0 0 1 1 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0 + 0 1 0 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 1 1 0 1 0 0 1 + 0 1 0 1 0 0 1 0 0 1 1 1 0 1 1 0 0 1 0 1 1 0 1 0 1 0 0 1 1 0] + + + +```python +print predict(D[0]) +``` + + [0 0 0 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 1 0 + 1 1 0 0 0 0 1 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1 0 0 + 0 1 0 1 0 0 1 1 0 1 0 0 0 1 0 1 1 1 0 1 1 0 1 0 0 1 0 0 1 0 1 1 1 1 0 1 0 + 0 0 0 1 0 1 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 0 1 0 1 1 0 1 0 1 1 0 0 0 0 0 1 + 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 0 1 1 0 0 1 1 1 1 1 + 1 0 1 0 0 1 0 0 1 1 1 1 0 1 0 1 0 1 1 1 1 0 1 0 0 1 1 1 0 0 0 1 0 0 0 1 0 + 1 0 1 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 1 0 1 1 + 0 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 + 0 0 1 1 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 1 0 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0 + 0 1 0 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 0 1 1 0 0 1 1 1 0 1 0 1 1 1 0 1 0 0 1 + 0 1 0 1 0 0 1 0 0 1 1 1 0 1 1 0 0 1 0 1 1 0 1 0 1 0 0 1 1 0] + diff --git a/docs/09-theano/09.10-softmax-on-mnist.md b/docs/09-theano/09.10-softmax-on-mnist.md new file mode 100644 index 00000000..50e90a10 --- /dev/null +++ b/docs/09-theano/09.10-softmax-on-mnist.md @@ -0,0 +1,325 @@ + +# Theano 实例:Softmax 回归 + +## MNIST 数据集的下载和导入 + +[MNIST 数据集](http://yann.lecun.com/exdb/mnist/) 是一个手写数字组成的数据集,现在被当作一个机器学习算法评测的基准数据集。 + +这是一个下载并解压数据的脚本: + + +```python +%%file download_mnist.py +import os +import os.path +import urllib +import gzip +import shutil + +if not os.path.exists('mnist'): + os.mkdir('mnist') + +def download_and_gzip(name): + if not os.path.exists(name + '.gz'): + urllib.urlretrieve('http://yann.lecun.com/exdb/' + name + '.gz', name + '.gz') + if not os.path.exists(name): + with gzip.open(name + '.gz', 'rb') as f_in, open(name, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + +download_and_gzip('mnist/train-images-idx3-ubyte') +download_and_gzip('mnist/train-labels-idx1-ubyte') +download_and_gzip('mnist/t10k-images-idx3-ubyte') +download_and_gzip('mnist/t10k-labels-idx1-ubyte') +``` + + Overwriting download_mnist.py + + +可以运行这个脚本来下载和解压数据: + + +```python +%run download_mnist.py +``` + +使用如下的脚本来导入 MNIST 数据,源码地址: + +https://github.com/Newmu/Theano-Tutorials/blob/master/load.py + + +```python +%%file load.py +import numpy as np +import os + +datasets_dir = './' + +def one_hot(x,n): + if type(x) == list: + x = np.array(x) + x = x.flatten() + o_h = np.zeros((len(x),n)) + o_h[np.arange(len(x)),x] = 1 + return o_h + +def mnist(ntrain=60000,ntest=10000,onehot=True): + data_dir = os.path.join(datasets_dir,'mnist/') + fd = open(os.path.join(data_dir,'train-images-idx3-ubyte')) + loaded = np.fromfile(file=fd,dtype=np.uint8) + trX = loaded[16:].reshape((60000,28*28)).astype(float) + + fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte')) + loaded = np.fromfile(file=fd,dtype=np.uint8) + trY = loaded[8:].reshape((60000)) + + fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte')) + loaded = np.fromfile(file=fd,dtype=np.uint8) + teX = loaded[16:].reshape((10000,28*28)).astype(float) + + fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte')) + loaded = np.fromfile(file=fd,dtype=np.uint8) + teY = loaded[8:].reshape((10000)) + + trX = trX/255. + teX = teX/255. + + trX = trX[:ntrain] + trY = trY[:ntrain] + + teX = teX[:ntest] + teY = teY[:ntest] + + if onehot: + trY = one_hot(trY, 10) + teY = one_hot(teY, 10) + else: + trY = np.asarray(trY) + teY = np.asarray(teY) + + return trX,teX,trY,teY +``` + + Overwriting load.py + + +## softmax 回归 + +`Softmax` 回归相当于 `Logistic` 回归的一个一般化,`Logistic` 回归处理的是两类问题,`Softmax` 回归处理的是 `N` 类问题。 + +`Logistic` 回归输出的是标签为 1 的概率(标签为 0 的概率也就知道了),对应地,对 N 类问题 `Softmax` 输出的是每个类对应的概率。 + +具体的内容,可以参考 `UFLDL` 教程: + +http://ufldl.stanford.edu/wiki/index.php/Softmax%E5%9B%9E%E5%BD%92 + + +```python +import theano +from theano import tensor as T +import numpy as np +from load import mnist +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +我们来看它具体的实现。 + +这两个函数一个是将数据转化为 `GPU` 计算的类型,另一个是初始化权重: + + +```python +def floatX(X): + return np.asarray(X, dtype=theano.config.floatX) + +def init_weights(shape): + return theano.shared(floatX(np.random.randn(*shape) * 0.01)) +``` + +`Softmax` 的模型在 `theano` 中已经实现好了: + + +```python +A = T.matrix() + +B = T.nnet.softmax(A) + +test_softmax = theano.function([A], B) + +a = floatX(np.random.rand(3, 4)) + +b = test_softmax(a) + +print b.shape + +# 行和 +print b.sum(1) +``` + + (3, 4) + [ 1.00000012 1. 1. ] + + +`softmax` 函数会按照行对矩阵进行 `Softmax` 归一化。 + +所以我们的模型为: + + +```python +def model(X, w): + return T.nnet.softmax(T.dot(X, w)) +``` + +导入数据: + + +```python +trX, teX, trY, teY = mnist(onehot=True) +``` + +定义变量,并初始化权重: + + +```python +X = T.fmatrix() +Y = T.fmatrix() + +w = init_weights((784, 10)) +``` + +定义模型输出和预测: + + +```python +py_x = model(X, w) +y_pred = T.argmax(py_x, axis=1) +``` + +损失函数为多类的交叉熵,这个在 `theano` 中也被定义好了: + + +```python +cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) +gradient = T.grad(cost=cost, wrt=w) +update = [[w, w - gradient * 0.05]] +``` + +编译 `train` 和 `predict` 函数: + + +```python +train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True) +predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True) +``` + +迭代 100 次,测试集正确率为 0.925: + + +```python +for i in range(100): + for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): + cost = train(trX[start:end], trY[start:end]) + print "{0:03d}".format(i), np.mean(np.argmax(teY, axis=1) == predict(teX)) +``` + + 000 0.8862 + 001 0.8985 + 002 0.9042 + 003 0.9084 + 004 0.9104 + 005 0.9121 + 006 0.9121 + 007 0.9142 + 008 0.9158 + 009 0.9163 + 010 0.9162 + 011 0.9166 + 012 0.9171 + 013 0.9176 + 014 0.9182 + 015 0.9182 + 016 0.9184 + 017 0.9188 + 018 0.919 + 019 0.919 + 020 0.9194 + 021 0.9201 + 022 0.9204 + 023 0.9203 + 024 0.9205 + 025 0.9207 + 026 0.9207 + 027 0.9209 + 028 0.9214 + 029 0.9213 + 030 0.9212 + 031 0.9211 + 032 0.9217 + 033 0.9217 + 034 0.9217 + 035 0.922 + 036 0.9222 + 037 0.922 + 038 0.922 + 039 0.9218 + 040 0.9219 + 041 0.9223 + 042 0.9225 + 043 0.9226 + 044 0.9227 + 045 0.9225 + 046 0.9227 + 047 0.9231 + 048 0.9231 + 049 0.9231 + 050 0.9232 + 051 0.9232 + 052 0.9231 + 053 0.9231 + 054 0.9233 + 055 0.9233 + 056 0.9237 + 057 0.9239 + 058 0.9239 + 059 0.9239 + 060 0.924 + 061 0.9242 + 062 0.9242 + 063 0.9243 + 064 0.9243 + 065 0.9244 + 066 0.9244 + 067 0.9244 + 068 0.9245 + 069 0.9244 + 070 0.9244 + 071 0.9245 + 072 0.9244 + 073 0.9243 + 074 0.9243 + 075 0.9244 + 076 0.9243 + 077 0.9242 + 078 0.9244 + 079 0.9244 + 080 0.9243 + 081 0.9242 + 082 0.9239 + 083 0.9241 + 084 0.9242 + 085 0.9243 + 086 0.9244 + 087 0.9243 + 088 0.9243 + 089 0.9244 + 090 0.9246 + 091 0.9246 + 092 0.9246 + 093 0.9247 + 094 0.9246 + 095 0.9246 + 096 0.9246 + 097 0.9246 + 098 0.9246 + 099 0.9248 + diff --git a/docs/09-theano/09.11-net-on-mnist.md b/docs/09-theano/09.11-net-on-mnist.md new file mode 100644 index 00000000..11ec987d --- /dev/null +++ b/docs/09-theano/09.11-net-on-mnist.md @@ -0,0 +1,248 @@ + +# Theano 实例:人工神经网络 + +神经网络的模型可以参考 UFLDL 的教程,这里不做过多描述。 + +http://ufldl.stanford.edu/wiki/index.php/%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C + + +```python +import theano +import theano.tensor as T + +import numpy as np +from load import mnist +``` + + Using gpu device 1: Tesla K10.G2.8GB (CNMeM is disabled) + + +我们在这里使用一个简单的三层神经网络:输入 - 隐层 - 输出。 + +对于网络的激活函数,隐层用 `sigmoid` 函数,输出层用 `softmax` 函数,其模型如下: + +$$ +\begin{aligned} + h & = \sigma (W_h X) \\ + o & = \text{softmax} (W_o h) +\end{aligned} +$$ + + +```python +def model(X, w_h, w_o): + """ + input: + X: input data + w_h: hidden unit weights + w_o: output unit weights + output: + Y: probability of y given x + """ + # 隐层 + h = T.nnet.sigmoid(T.dot(X, w_h)) + # 输出层 + pyx = T.nnet.softmax(T.dot(h, w_o)) + return pyx +``` + +使用随机梯度下降的方法进行训练: + + +```python +def sgd(cost, params, lr=0.05): + """ + input: + cost: cost function + params: parameters + lr: learning rate + output: + update rules + """ + grads = T.grad(cost=cost, wrt=params) + updates = [] + for p, g in zip(params, grads): + updates.append([p, p - g * lr]) + return updates +``` + +对于 `MNIST` 手写数字的问题,我们使用一个 `784 × 625 × 10` 即输入层大小为 `784`,隐层大小为 `625`,输出层大小为 `10` 的神经网络来模拟,最后的输出表示数字为 `0` 到 `9` 的概率。 + +为了对权重进行更新,我们需要将权重设为 shared 变量: + + +```python +def floatX(X): + return np.asarray(X, dtype=theano.config.floatX) + +def init_weights(shape): + return theano.shared(floatX(np.random.randn(*shape) * 0.01)) +``` + +因此变量初始化为: + + +```python +X = T.matrix() +Y = T.matrix() + +w_h = init_weights((784, 625)) +w_o = init_weights((625, 10)) +``` + +模型输出为: + + +```python +py_x = model(X, w_h, w_o) +``` + +预测的结果为: + + +```python +y_x = T.argmax(py_x, axis=1) +``` + +模型的误差函数为: + + +```python +cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) +``` + +更新规则为: + + +```python +updates = sgd(cost, [w_h, w_o]) +``` + +定义训练和预测的函数: + + +```python +train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) +predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) +``` + +训练: + +导入 MNIST 数据: + + +```python +trX, teX, trY, teY = mnist(onehot=True) +``` + +训练 100 轮,正确率为 0.956: + + +```python +for i in range(100): + for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): + cost = train(trX[start:end], trY[start:end]) + print "{0:03d}".format(i), np.mean(np.argmax(teY, axis=1) == predict(teX)) +``` + + 000 0.7028 + 001 0.8285 + 002 0.8673 + 003 0.883 + 004 0.89 + 005 0.895 + 006 0.8984 + 007 0.9017 + 008 0.9047 + 009 0.907 + 010 0.9089 + 011 0.9105 + 012 0.9127 + 013 0.914 + 014 0.9152 + 015 0.9159 + 016 0.9169 + 017 0.9173 + 018 0.918 + 019 0.9185 + 020 0.919 + 021 0.9197 + 022 0.9201 + 023 0.9205 + 024 0.9206 + 025 0.9212 + 026 0.9219 + 027 0.9228 + 028 0.9228 + 029 0.9229 + 030 0.9236 + 031 0.9244 + 032 0.925 + 033 0.9255 + 034 0.9263 + 035 0.927 + 036 0.9274 + 037 0.9278 + 038 0.928 + 039 0.9284 + 040 0.9289 + 041 0.9294 + 042 0.9298 + 043 0.9302 + 044 0.9311 + 045 0.932 + 046 0.9325 + 047 0.9332 + 048 0.934 + 049 0.9347 + 050 0.9354 + 051 0.9358 + 052 0.9365 + 053 0.9372 + 054 0.9377 + 055 0.9385 + 056 0.9395 + 057 0.9399 + 058 0.9405 + 059 0.9411 + 060 0.9416 + 061 0.9422 + 062 0.9427 + 063 0.9429 + 064 0.9431 + 065 0.9438 + 066 0.9444 + 067 0.9446 + 068 0.9449 + 069 0.9453 + 070 0.9458 + 071 0.9462 + 072 0.9469 + 073 0.9475 + 074 0.9474 + 075 0.9476 + 076 0.948 + 077 0.949 + 078 0.9497 + 079 0.95 + 080 0.9503 + 081 0.9507 + 082 0.9507 + 083 0.9515 + 084 0.9519 + 085 0.9521 + 086 0.9523 + 087 0.9529 + 088 0.9536 + 089 0.9538 + 090 0.9542 + 091 0.9545 + 092 0.9544 + 093 0.9546 + 094 0.9547 + 095 0.9549 + 096 0.9552 + 097 0.9554 + 098 0.9557 + 099 0.9562 + diff --git a/docs/09-theano/09.12-random-streams.md b/docs/09-theano/09.12-random-streams.md new file mode 100644 index 00000000..ca2b6a07 --- /dev/null +++ b/docs/09-theano/09.12-random-streams.md @@ -0,0 +1,60 @@ + +# Theano 随机数流变量 + + +```python +import theano +import theano.tensor as T +import numpy as np +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +`Theano` 的随机数变量由 `theano.sandbox.rng_mrg` 中的 `MRG_RandomStreams` 实现(`sandbox` 表示是实验代码): + + +```python +from theano.sandbox.rng_mrg import MRG_RandomStreams +``` + +新建一个 `MRG_RandomStreams(seed=12345, use_cuda=None)` 实例: + + +```python +srng = MRG_RandomStreams() +``` + +它支持以下方法: + +- `normal(size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None)` + - 产生指定形状的、服从正态分布 $N(avg, std)$ 的随机数变量,默认为标准正态分布 +- `uniform(size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None)` + - 产生指定形状的、服从均匀分布 $U(low, high)$ 的随机数变量,默认为 0-1 之间的均匀分布 +- `binomial(size=None, n=1, p=0.5, ndim=None, dtype='int64', nstreams=None)` + - 产生指定形状的、服从二项分布 $B(n,p)$ 的随机数变量 +- `multinomial(size=None, n=1, pvals=None, ndim=None, dtype='int64', nstreams=None)` + - 产生指定形状的、服从多项分布的随机数变量 + +与 np.random.random 不同,它产生的是随机数变量,而不是随机数数组,因此可以将 `size` 作为参数传给它: + + +```python +rand_size = T.vector(dtype="int64") + +rand_normal = srng.normal(rand_size.shape) +rand_uniform = srng.uniform(rand_size.shape) +rand_binomial = srng.binomial(rand_size.shape) + +f_rand = theano.function(inputs = [rand_size], + outputs = [rand_normal, rand_uniform, rand_binomial]) + +print f_rand(range(5))[0] +print f_rand(range(5))[1] +print f_rand(range(5))[2] +``` + + [ 0.10108768 -1.64354193 0.71042836 -0.77760422 0.06291872] + [ 0.23193923 0.71880513 0.03122572 0.97318739 0.99260223] + [0 1 0 1 1] + diff --git a/docs/09-theano/09.13-modern-net-on-mnist.md b/docs/09-theano/09.13-modern-net-on-mnist.md new file mode 100644 index 00000000..ba43f6eb --- /dev/null +++ b/docs/09-theano/09.13-modern-net-on-mnist.md @@ -0,0 +1,218 @@ + +# Theano 实例:更复杂的网络 + + +```python +import theano +import theano.tensor as T +import numpy as np +from load import mnist +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams + +srng = RandomStreams() + +def floatX(X): + return np.asarray(X, dtype=theano.config.floatX) +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +上一节我们用了一个简单的神经网络来训练 MNIST 数据,这次我们使用更复杂的网络来进行训练,同时加入 `dropout` 机制,防止过拟合。 + +这里采用比较简单的 `dropout` 机制,即将输入值按照一定的概率随机置零。 + + +```python +def dropout(X, prob=0.): + if prob > 0: + X *= srng.binomial(X.shape, p=1-prob, dtype = theano.config.floatX) + X /= 1 - prob + return X +``` + +之前我们采用的的激活函数是 `sigmoid`,现在我们使用 `rectify` 激活函数。 + +这可以使用 `T.nnet.relu(x, alpha=0)` 来实现,它本质上相当于:`T.switch(x > 0, x, alpha * x)`,而 `rectify` 函数的定义为: + +$$ +\text{rectify}(x) = \left\{ +\begin{aligned} +x, & \ x > 0 \\ +0, & \ x < 0 +\end{aligned}\right. +$$ + +之前我们构造的是一个单隐层的神经网络结构,现在我们构造一个双隐层的结构即“输入-隐层1-隐层2-输出”的全连接结构。 + +$$ +\begin{aligned} +& h_1 = \text{rectify}(W_{h_1} \ x) \\ +& h_2 = \text{rectify}(W_{h_2} \ h_1) \\ +& o = \text{softmax}(W_o h_2) +\end{aligned} +$$ + +`Theano` 自带的 `T.nnet.softmax()` 的 GPU 实现目前似乎有 bug 会导致梯度溢出的问题,因此自定义了 `softmax` 函数: + + +```python +def softmax(X): + e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) + return e_x / e_x.sum(axis=1).dimshuffle(0, 'x') + +def model(X, w_h1, w_h2, w_o, p_drop_input, p_drop_hidden): + """ + input: + X: input data + w_h1: weights input layer to hidden layer 1 + w_h2: weights hidden layer 1 to hidden layer 2 + w_o: weights hidden layer 2 to output layer + p_drop_input: dropout rate for input layer + p_drop_hidden: dropout rate for hidden layer + output: + h1: hidden layer 1 + h2: hidden layer 2 + py_x: output layer + """ + X = dropout(X, p_drop_input) + h1 = T.nnet.relu(T.dot(X, w_h1)) + + h1 = dropout(h1, p_drop_hidden) + h2 = T.nnet.relu(T.dot(h1, w_h2)) + + h2 = dropout(h2, p_drop_hidden) + py_x = softmax(T.dot(h2, w_o)) + return h1, h2, py_x +``` + +随机初始化权重矩阵: + + +```python +def init_weights(shape): + return theano.shared(floatX(np.random.randn(*shape) * 0.01)) + +w_h1 = init_weights((784, 625)) +w_h2 = init_weights((625, 625)) +w_o = init_weights((625, 10)) +``` + +定义变量: + + +```python +X = T.matrix() +Y = T.matrix() +``` + +定义更新的规则,之前我们使用的是简单的 SGD,这次我们使用 RMSprop 来更新,其规则为: +$$ +\begin{align} +MS(w, t) & = \rho MS(w, t-1) + (1-\rho) \left(\left.\frac{\partial E}{\partial w}\right|_{w(t-1)}\right)^2 \\ +w(t) & = w(t-1) - \alpha \left.\frac{\partial E}{\partial w}\right|_{w(t-1)} / \sqrt{MS(w, t)} +\end{align} +$$ + + +```python +def RMSprop(cost, params, accs, lr=0.001, rho=0.9, epsilon=1e-6): + grads = T.grad(cost=cost, wrt=params) + updates = [] + for p, g, acc in zip(params, grads, accs): + acc_new = rho * acc + (1 - rho) * g ** 2 + gradient_scaling = T.sqrt(acc_new + epsilon) + g = g / gradient_scaling + updates.append((acc, acc_new)) + updates.append((p, p - lr * g)) + return updates +``` + +训练函数: + + +```python +# 有 dropout,用来训练 +noise_h1, noise_h2, noise_py_x = model(X, w_h1, w_h2, w_o, 0.2, 0.5) +cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) +params = [w_h1, w_h2, w_o] +accs = [theano.shared(p.get_value() * 0.) for p in params] +updates = RMSprop(cost, params, accs, lr=0.001) +# 训练函数 +train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) +``` + +预测函数: + + +```python +# 没有 dropout,用来预测 +h1, h2, py_x = model(X, w_h1, w_h2, w_o, 0., 0.) +# 预测的结果 +y_x = T.argmax(py_x, axis=1) +predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) +``` + +训练: + + +```python +trX, teX, trY, teY = mnist(onehot=True) + +for i in range(50): + for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): + cost = train(trX[start:end], trY[start:end]) + print "iter {:03d} accuracy:".format(i + 1), np.mean(np.argmax(teY, axis=1) == predict(teX)) +``` + + iter 001 accuracy: 0.943 + iter 002 accuracy: 0.9665 + iter 003 accuracy: 0.9732 + iter 004 accuracy: 0.9763 + iter 005 accuracy: 0.9767 + iter 006 accuracy: 0.9802 + iter 007 accuracy: 0.9795 + iter 008 accuracy: 0.979 + iter 009 accuracy: 0.9807 + iter 010 accuracy: 0.9805 + iter 011 accuracy: 0.9824 + iter 012 accuracy: 0.9816 + iter 013 accuracy: 0.9838 + iter 014 accuracy: 0.9846 + iter 015 accuracy: 0.983 + iter 016 accuracy: 0.9837 + iter 017 accuracy: 0.9841 + iter 018 accuracy: 0.9837 + iter 019 accuracy: 0.9835 + iter 020 accuracy: 0.9844 + iter 021 accuracy: 0.9837 + iter 022 accuracy: 0.9839 + iter 023 accuracy: 0.984 + iter 024 accuracy: 0.9851 + iter 025 accuracy: 0.985 + iter 026 accuracy: 0.9847 + iter 027 accuracy: 0.9851 + iter 028 accuracy: 0.9846 + iter 029 accuracy: 0.9846 + iter 030 accuracy: 0.9853 + iter 031 accuracy: 0.985 + iter 032 accuracy: 0.9844 + iter 033 accuracy: 0.9849 + iter 034 accuracy: 0.9845 + iter 035 accuracy: 0.9848 + iter 036 accuracy: 0.9868 + iter 037 accuracy: 0.9864 + iter 038 accuracy: 0.9866 + iter 039 accuracy: 0.9859 + iter 040 accuracy: 0.9857 + iter 041 accuracy: 0.9853 + iter 042 accuracy: 0.9855 + iter 043 accuracy: 0.9861 + iter 044 accuracy: 0.9865 + iter 045 accuracy: 0.9872 + iter 046 accuracy: 0.9867 + iter 047 accuracy: 0.9868 + iter 048 accuracy: 0.9863 + iter 049 accuracy: 0.9862 + iter 050 accuracy: 0.9856 + diff --git a/docs/09-theano/09.14-convolutional-net-on-mnist.md b/docs/09-theano/09.14-convolutional-net-on-mnist.md new file mode 100644 index 00000000..fff8eb48 --- /dev/null +++ b/docs/09-theano/09.14-convolutional-net-on-mnist.md @@ -0,0 +1,239 @@ + +# Theano 实例:卷积神经网络 + + +```python +import theano +import theano.tensor as T +from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams +import numpy as np +from load import mnist + +srng = RandomStreams() +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +从前一节导入有用的函数: + + +```python +def floatX(X): + return np.asarray(X, dtype=theano.config.floatX) + +def init_weights(shape): + return theano.shared(floatX(np.random.randn(*shape) * 0.01)) + +def rectify(X): + return T.maximum(X, 0.) + +def softmax(X): + e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) + return e_x / e_x.sum(axis=1).dimshuffle(0, 'x') + +def dropout(X, p=0.): + if p > 0: + retain_prob = 1 - p + X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) + X /= retain_prob + return X + +def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6): + grads = T.grad(cost=cost, wrt=params) + updates = [] + for p, g in zip(params, grads): + acc = theano.shared(p.get_value() * 0.) + acc_new = rho * acc + (1 - rho) * g ** 2 + gradient_scaling = T.sqrt(acc_new + epsilon) + g = g / gradient_scaling + updates.append((acc, acc_new)) + updates.append((p, p - lr * g)) + return updates +``` + +与前一节不同,我们使用卷积神经网络来实现这次的模型,为此,我们需要导入 2 维的卷积和池化函数: + + +```python +from theano.tensor.nnet.conv import conv2d +from theano.tensor.signal.downsample import max_pool_2d +``` + +`conv2d` 函数接受两个输入: + +- 对应输入的 `4D` 张量,其形状如下: + + `[mini-batch size, number of feature maps at layer m-1, image height, image width]` + + +- 对应参数矩阵的 `4D` 张量,其形状如下: + + `[number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]` + +为了对图像使用卷积,我们需要将图像转化为原始的 `28 × 28` 大小,同时添加一维表示图像的通道数(黑白图像为 1): + + +```python +trX, teX, trY, teY = mnist(onehot=True) + +trX = trX.reshape(-1, 1, 28, 28) +teX = teX.reshape(-1, 1, 28, 28) +``` + +注意,对于 `reshape` 方法,传入的参数是 `-1` 表示该维的维度将根据其他参数自动计算。 + +模型首先进行三层卷积加池化操作,然后在第三层的输出中加一个全连结层,最后在第四层加上一个 `softmax` 层: + + +```python +def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden): + + # X: 128 * 1 * 28 * 28 + # w: 32 * 1 * 3 * 3 + # full mode + # l1a: 128 * 32 * (28 + 3 - 1) * (28 + 3 - 1) + l1a = rectify(conv2d(X, w, border_mode='full')) + # l1a: 128 * 32 * 30 * 30 + # ignore_border False + # l1: 128 * 32 * (30 / 2) * (30 / 2) + l1 = max_pool_2d(l1a, (2, 2), ignore_border=False) + l1 = dropout(l1, p_drop_conv) + + # l1: 128 * 32 * 15 * 15 + # w2: 64 * 32 * 3 * 3 + # valid mode + # l2a: 128 * 64 * (15 - 3 + 1) * (15 - 3 + 1) + l2a = rectify(conv2d(l1, w2)) + # l2a: 128 * 64 * 13 * 13 + # l2: 128 * 64 * (13 / 2 + 1) * (13 / 2 + 1) + l2 = max_pool_2d(l2a, (2, 2), ignore_border=False) + l2 = dropout(l2, p_drop_conv) + + # l2: 128 * 64 * 7 * 7 + # w3: 128 * 64 * 3 * 3 + # l3a: 128 * 128 * (7 - 3 + 1) * (7 - 3 + 1) + l3a = rectify(conv2d(l2, w3)) + # l3a: 128 * 128 * 5 * 5 + # l3b: 128 * 128 * (5 / 2 + 1) * (5 / 2 + 1) + l3b = max_pool_2d(l3a, (2, 2), ignore_border=False) + # l3b: 128 * 128 * 3 * 3 + # l3: 128 * (128 * 3 * 3) + l3 = T.flatten(l3b, outdim=2) + l3 = dropout(l3, p_drop_conv) + + # l3: 128 * (128 * 3 * 3) + # w4: (128 * 3 * 3) * 625 + # l4: 128 * 625 + l4 = rectify(T.dot(l3, w4)) + l4 = dropout(l4, p_drop_hidden) + + # l5: 128 * 625 + # w5: 625 * 10 + # pyx: 128 * 10 + pyx = softmax(T.dot(l4, w_o)) + return l1, l2, l3, l4, pyx +``` + +定义符号变量: + + +```python +X = T.ftensor4() +Y = T.fmatrix() + +w = init_weights((32, 1, 3, 3)) +w2 = init_weights((64, 32, 3, 3)) +w3 = init_weights((128, 64, 3, 3)) +w4 = init_weights((128 * 3 * 3, 625)) +w_o = init_weights((625, 10)) +``` + +使用带 `dropout` 的模型进行训练: + + +```python +noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5) +``` + +使用不带 `dropout` 的模型进行预测: + + +```python +l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0.) +y_x = T.argmax(py_x, axis=1) +``` + +定义损失函数和迭代规则: + + +```python +cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) +params = [w, w2, w3, w4, w_o] +updates = RMSprop(cost, params, lr=0.001) +``` + +开始训练: + + +```python +train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) +predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) + +for i in range(50): + for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)): + cost = train(trX[start:end], trY[start:end]) + print "iter {:03d}, {:.3f}".format(i + 1, np.mean(np.argmax(teY, axis=1) == predict(teX))) +``` + + iter 001, 0.917 + iter 002, 0.974 + iter 003, 0.983 + iter 004, 0.984 + iter 005, 0.987 + iter 006, 0.989 + iter 007, 0.991 + iter 008, 0.993 + iter 009, 0.991 + iter 010, 0.992 + iter 011, 0.993 + iter 012, 0.992 + iter 013, 0.992 + iter 014, 0.992 + iter 015, 0.993 + iter 016, 0.992 + iter 017, 0.994 + iter 018, 0.993 + iter 019, 0.993 + iter 020, 0.994 + iter 021, 0.993 + iter 022, 0.993 + iter 023, 0.993 + iter 024, 0.992 + iter 025, 0.994 + iter 026, 0.993 + iter 027, 0.994 + iter 028, 0.993 + iter 029, 0.993 + iter 030, 0.994 + iter 031, 0.994 + iter 032, 0.993 + iter 033, 0.994 + iter 034, 0.994 + iter 035, 0.994 + iter 036, 0.994 + iter 037, 0.994 + iter 038, 0.993 + iter 039, 0.994 + iter 040, 0.994 + iter 041, 0.994 + iter 042, 0.994 + iter 043, 0.995 + iter 044, 0.994 + iter 045, 0.994 + iter 046, 0.994 + iter 047, 0.995 + iter 048, 0.994 + iter 049, 0.994 + iter 050, 0.995 + diff --git a/docs/09-theano/09.15-tensor-basics.md b/docs/09-theano/09.15-tensor-basics.md new file mode 100644 index 00000000..cfab238d --- /dev/null +++ b/docs/09-theano/09.15-tensor-basics.md @@ -0,0 +1,307 @@ + +# Theano tensor 模块:基础 + +张量是向量在数学上的一种推广,具体内容可以参考维基百科: +https://en.wikipedia.org/wiki/Tensor + +在 Theano 中有一个专门处理张量变量的模块:`theano.tensor` (以下简称 `T`)。 + + +```python +import theano +import theano.tensor as T +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +## 构造符号变量 + +可以用 `tensor` 模块创造符号变量: + + +```python +x = T.fmatrix() + +print type(x) +print type(T.fmatrix) +``` + + + + + +从上面可以看到,`T.fmatrix()` 创造出的是一个 `TensorVariable` 类,而 `T.fmatrix` 本身是一个 `TensorType` 类。 + +除了使用 `fmatrix`,我们还可以通过指定 `matrix` 的 `dtype` 参数来定义,例如下面的三种方式都是产生一个 `int32` 型的标量: + + +```python +x = T.scalar('myvar', dtype='int32') +x = T.iscalar('myvar') +x = T.TensorType(dtype='int32', broadcastable=())('myvar') +``` + +常用的构造函数有: + +- `T.scalar(name=None, dtype=config.floatX)` +- `T.vector(name=None, dtype=config.floatX)` +- `T.row(name=None, dtype=config.floatX)` +- `T.col(name=None, dtype=config.floatX)` +- `T.matrix(name=None, dtype=config.floatX)` +- `T.tensor3(name=None, dtype=config.floatX)` +- `T.tensor4(name=None, dtype=config.floatX)` + +还可以使用一个构造多个变量: +- `T.scalars` +- `T.vectors` +- `T.rows` +- `T.cols` +- `T.matrices` + +除此之外,我们还可以用 `TensorType` 类自定义的符号变量: + +`T.TensorType(dtype, broadcastable, name=None)` + +- `dtype: str`:对应于 `numpy` 中的类型 +- `broadcastable: tuple, list, or array of boolean values`:如果是 `True` 表示该维的维度只能为 1;长度表示符号变量的维度。 + +|pattern|interpretation| +|---|---| +| [] | scalar | +| [True] | 1D scalar (vector of length 1) | +| [True, True] | 2D scalar (1x1 matrix) | +| [False] | vector | +| [False, False] | matrix | +| [False] * n | nD tensor | +| [True, False] | row (1xN matrix) | +| [False, True] | column (Mx1 matrix) | +| [False, True, False] | A Mx1xP tensor (a) | +| [True, False, False] | A 1xNxP tensor (b) | +| [False, False, False] | A MxNxP tensor (pattern of a + b) | + +产生一个五维的变量类型: + + +```python +dtensor5 = T.TensorType('float64', (False,)*5) + +x = dtensor5() +``` + +## 变量方法 + +### .dim + +维度: + + +```python +print x.ndim +``` + + 5 + + +### .type + +类型: + + +```python +print x.type +``` + + TensorType(float64, 5D) + + +### .dtype + +包含的变量类型: + + +```python +print x.dtype +``` + + float64 + + +### .reshape + +传入一个变量对 x 进行 `reshape`,通常需要指定 `shape` 的 `ndim`: + + +```python +shape = T.ivector("shape") + +y = x.reshape(shape, ndim=3) +``` + +`y` 是 `x` 的一个 `view`: + + +```python +print x.ndim, y.ndim +``` + + 5 3 + + +### .dimshuffle + +`dimshuffle` 改变维度的顺序,返回原始变量的一个 `view`: + +输入是一个包含 `0,1,...,ndim-1` 和任意数目的 `'x'` 的组合: + +例如: + +- `('x')`:将标量变成 1 维数组 +- `(0, 1)`:与原始的 2 维数组相同 +- `(1, 0)`:交换 2 维数组的两个维度,形状从 `N × M` 变 `M × N` +- `('x', 0)`:形状从 `N` 变成 `1 × N` +- `(0, 'x')`:形状从 `N` 变成 `N × 1` +- `(2, 0, 1)`: 形状从 `A × B × C` 变成 `C × A × B` +- `(0, 'x', 1)`: 形状从 `A × B` 变成 `A × 1 × B` +- `(1, 'x', 0)`: 形状从 `A × B` 变成 `B × 1 × A` +- `(1,)`: 将第 0 维除去,除去的维度的大小必须为 1。形状从 `1 × A` 变成 `A` + + +```python +z = y.dimshuffle(("x", 1, 2, 0)) + +print z +print z.ndim +``` + + DimShuffle{x,1,2,0}.0 + 4 + + +### .flatten + +`flatten(ndim=1)` 返回原始变量的一个 `view`,将变量降为 `ndim` 维: + + +```python +z = x.flatten(ndim=2) + +print z.ndim +``` + + 2 + + +### .ravel + +与 `flatten` 一样。 + +### .T + +转置,注意,一维数组或者变量的转置是其本身,要想将行列向量互相转换,需要使用 `reshape` 或者 `dimshuffle`。 + +### 其他方法 + + +```python +print filter(lambda t: t.isalpha(), dir(x)) +``` + + ['T', 'all', 'any', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'argmax', 'argmin', 'argsort', 'astype', 'broadcastable', 'ceil', 'choose', 'clip', 'clone', 'compress', 'conj', 'conjugate', 'copy', 'cos', 'cosh', 'cumprod', 'cumsum', 'diagonal', 'dimshuffle', 'dot', 'dtype', 'eval', 'exp', 'fill', 'flatten', 'floor', 'imag', 'index', 'log', 'max', 'mean', 'min', 'name', 'ndim', 'nonzero', 'norm', 'owner', 'prod', 'ptp', 'ravel', 'real', 'repeat', 'reshape', 'round', 'shape', 'sin', 'sinh', 'size', 'sort', 'sqrt', 'squeeze', 'std', 'sum', 'swapaxes', 'tag', 'take', 'tan', 'tanh', 'trace', 'transpose', 'trunc', 'type', 'var'] + + +## 模块函数 + +为了与 `numpy` 兼容,`tensor` + +### T.shape + +`shape(x)` 返回一个存储变量 `x` 形状的变量: + + +```python +print T.shape(x) +``` + + Shape.0 + + +### T.shape_padleft, T.shape_padright + +在最左边/右边加上 n 个大小为 1 的 1 个维度: + + +```python +x = T.tensor3() + +print T.shape_padleft(x) +print T.shape_padright(x) +``` + + DimShuffle{x,0,1,2}.0 + DimShuffle{0,1,2,x}.0 + + +### T.shape_padaxis + +在指定位置插入大小为 1 的 1 个维度: + + +```python +print T.shape_padaxis(x, 1) +print T.shape_padaxis(x, 0) +print T.shape_padaxis(x, -1) +``` + + DimShuffle{0,x,1,2}.0 + DimShuffle{x,0,1,2}.0 + DimShuffle{0,1,2,x}.0 + + +插入这些大小为 `1` 的维度,主要目的是 `broadcast` 化。 + +### T.unbroadcast + +可以使用 `unbroadcast(x, *axes)` 使得 `x` 的某些维度不可 `broadcast`。 + +### T.tile + +`tile(x, reps)` 按照规则重复 `x` + +## 产生张量 + +### T.zeros_like(x), T.ones_like(x) + +产生一个与 x 形状相同的全 0 或全 1 变量 + +### T.fill(a, b) + +使用 `b` 的值去填充 `a`,`b` 是一个数值或者 `theano scalar`。 + +### T.alloc(value, *shape) + +返回指定形状的变量,并初始化为 `value` + +### T.eye(n, m=None, k=0, dtype=theano.config.floatX) + +单位矩阵 + +### T.basic.choose(a, choices) + +`a` 是一个 `index` 数组变量,对应于 `choices` 中的位置。 + +## 降维 + +### T.max(x), T.argmax(x), T.max_and_argmax(x) + +最大值,最大值位置,最大值和最大值位置。 + +### T.min(x), T.argmin(x) + +最小值,最小值位置。 + +### T.sum(x), T.prod(x), T.mean(x), T.var(x), T.std(x) + +和,积,均值,方差,标准差 + +### T.all(x), T.any(x) diff --git a/docs/09-theano/09.16-tensor-indexing.md b/docs/09-theano/09.16-tensor-indexing.md new file mode 100644 index 00000000..efa63523 --- /dev/null +++ b/docs/09-theano/09.16-tensor-indexing.md @@ -0,0 +1,112 @@ + +# Theano tensor 模块:索引 + + +```python +import theano +import theano.tensor as T +import numpy as np +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +## 简单索引 + +`tensor` 模块完全支持 `numpy` 中的简单索引: + + +```python +t = T.arange(9) + +print t[1::2].eval() +``` + + [1 3 5 7] + + +`numpy` 结果: + + +```python +n = np.arange(9) + +print n[1::2] +``` + + [1 3 5 7] + + +## mask 索引 + +`tensor` 模块虽然支持简单索引,但并不支持 `mask` 索引,例如这样的做法是错误的: + + +```python +t = T.arange(9).reshape((3,3)) + +print t[t > 4].eval() +``` + + [[[0 1 2] + [0 1 2] + [0 1 2]] + + [[0 1 2] + [0 1 2] + [3 4 5]] + + [[3 4 5] + [3 4 5] + [3 4 5]]] + + +`numpy` 中的结果: + + +```python +n = np.arange(9).reshape((3,3)) + +print n[n > 4] +``` + + [5 6 7 8] + + +要想像 `numpy` 一样得到正确结果,我们需要使用这样的方法: + + +```python +print t[(t > 4).nonzero()].eval() +``` + + [5 6 7 8] + + +## 使用索引进行赋值 + +`tensor` 模块不支持直接使用索引赋值,例如 `a[5] = b, a[5]+=b` 等是不允许的。 + +不过可以考虑用 `set_subtensor` 和 `inc_subtensor` 来实现类似的功能: + +### T.set_subtensor(x, y) + +实现类似 r[10:] = 5 的功能: + + +```python +r = T.vector() + +new_r = T.set_subtensor(r[10:], 5) +``` + +### T.inc_subtensor(x, y) + +实现类似 r[10:] += 5 的功能: + + +```python +r = T.vector() + +new_r = T.inc_subtensor(r[10:], 5) +``` diff --git a/docs/09-theano/09.17-tensor-operator-and-elementwise-operations.md b/docs/09-theano/09.17-tensor-operator-and-elementwise-operations.md new file mode 100644 index 00000000..1e37df70 --- /dev/null +++ b/docs/09-theano/09.17-tensor-operator-and-elementwise-operations.md @@ -0,0 +1,261 @@ + +# Theano tensor 模块:操作符和逐元素操作 + +## 操作符 + + +```python +import theano +from theano import tensor as T +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +`tensor` 类型支持很多基本的操作: + + +```python +# 两个整形三维张量 + +a, b = T.itensor3("a"), T.itensor3("b") +``` + +### 算术操作 + + +```python +print theano.pp(a + 3) # T.add(a, 3) -> itensor3 +print theano.pp(3 - a) # T.sub(3, a) +print theano.pp(a * 3.5) # T.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting) +print theano.pp(2.2 / a) # T.truediv(2.2, a) +print theano.pp(2.2 // a) # T.intdiv(2.2, a) +print theano.pp(2.2**a) # T.pow(2.2, a) +print theano.pp(b % a) # T.mod(b, a) +``` + + (a + TensorConstant{3}) + (TensorConstant{3} - a) + (a * TensorConstant{3.5}) + (TensorConstant{2.20000004768} / a) + (TensorConstant{2.20000004768} // a) + (TensorConstant{2.20000004768} ** a) + mod(b, a) + + +### 比特操作 + + +```python +print theano.pp(a & b) # T.and_(a,b) bitwise and (alias T.bitwise_and) +print theano.pp(a ^ 1) # T.xor(a,1) bitwise xor (alias T.bitwise_xor) +print theano.pp(a | b) # T.or_(a,b) bitwise or (alias T.bitwise_or) +print theano.pp(~a) # T.invert(a) bitwise invert (alias T.bitwise_not) +``` + + and_(a, b) + xor(a, TensorConstant{1}) + or_(a, b) + invert(a) + + +### 原地操作 + +`Theano` 不支持原地操作如 `+=` 等,`Theano` 的图优化解构会自动决定是否使用原地操作。如果需要更新变量的值,可以考虑使用共享变量 `theano.shared`。 + +## 逐元素操作 + +### 类型转换 + +`T.cast(x, dtype)` 用于类型转换: + + +```python +x = T.matrix() +x_as_int = T.cast(x, 'int32') +``` + +`T.cast(x, dtype)` 的机制与 `numpy.asarray(x, dtype)` 的机制类似,只有 `dtype` 不同时才会创建新的变量: + + +```python +print x_as_int is x +print T.cast(x, theano.config.floatX) is x +``` + + False + True + + +复数取实部,虚部,角度,模: + +- `T.real(a)` +- `T.imag(a)` +- `T.angle(a)` +- `T.abs_(a)` + +### 比较 + +`Theano` 的比较操作也是逐元素的: + +- `T.lt(a, b)` : < +- `T.gt(a, b)` : > +- `T.le(a, b)` : <= +- `T.ge(a, b)` : >= +- `T.eq(a, b)` : == +- `T.neq(a, b)` : != + +`Theano` 中没有 `bool` 类型,所有的 `bool` 类型都用 `int8` 表示。 + + +```python +x, y = T.dmatrices('x','y') + +print theano.pp(T.le(x, y)) +``` + + le(x, y) + + +除此之外,还有另一些与 `numpy` 类似的用法: + +- `T.isnan(a)` : 是否 NAN +- `T.isinf(a)` : 是否 INF +- `T.isclose(a, b)` :浮点数是否接近 +- `T.allclose(a, b)` :浮点数是否很接近 + +### 条件 + +`T.switch(cond, ift, iff)` 选择 `ift (if ture)` 和 `iff (if false)`。 + +`T.where(cond, ift, iff)` 与 `switch` 一致。 + +`T.clip(x, min, max)` 低于 `min` 的部分变成 `min`,超过 `max` 的部分变成 `max`。 + +### 数学操作 + + +```python +a, b = T.matrices("a", "b") + + +print theano.pp(T.maximum(a, b)) # max(a, b) +print theano.pp(T.minimum(a, b)) # min(a, b) + +print theano.pp(T.neg(a)) # -a +print theano.pp(T.inv(a)) # 1.0/a + +print theano.pp(T.exp(a)) +print theano.pp(T.log(a)), theano.pp(T.log2(a)), theano.pp(T.log10(a)) # log10(a) + +print theano.pp(T.sgn(a)) # sgn(a) +print theano.pp(T.floor(a)) # floor(a) +print theano.pp(T.ceil(a)) # ceil(a) +print theano.pp(T.round(a)) # round(a) +print theano.pp(T.iround(a)) # iround(a) + +print theano.pp(T.sqr(a)) # sqr(a) +print theano.pp(T.sqrt(a)) # sqrt(a) + +print theano.pp(T.cos(a)), theano.pp(T.sin(a)), theano.pp(T.tan(a)) +print theano.pp(T.cosh(a)), theano.pp(T.sinh(a)), theano.pp(T.tanh(a)) # tan(a) + +print theano.pp(T.erf(a)), theano.pp(T.erfc(a)) # erf(a), erfc(a) +print theano.pp(T.erfinv(a)), theano.pp(T.erfcinv(a)) + +print theano.pp(T.gamma(a)) # gamma(a) +print theano.pp(T.gammaln(a)) # log(gamma(a)) +print theano.pp(T.psi(a)) # digamma(a) +``` + + maximum(a, b) + minimum(a, b) + (-a) + inv(a) + exp(a) + log(a) log2(a) log10(a) + sgn(a) + floor(a) + ceil(a) + round_half_away_from_zero(a) + int64(round_half_away_from_zero(a)) + sqr(a) + sqrt(a) + cos(a) sin(a) tan(a) + cosh(a) sinh(a) tanh(a) + erf(a) erfc(a) + erfinv(a) erfcinv(a) + gamma(a) + gammaln(a) + psi(a) + + +其中 `erf, erfc` 定义如下: +https://en.wikipedia.org/wiki/Error_function + +$$ +\operatorname{erf}(x) = \frac{2}{\sqrt\pi} \int_0^x e^{-t^2} dt +$$ +$$ +\begin{align} + \operatorname{erfc}(x) & = 1-\operatorname{erf}(x) \\ + & = \frac{2}{\sqrt\pi} \int_x^{\infty} e^{-t^2}\,\mathrm dt \\ + & = e^{-x^2} \operatorname{erfcx}(x) +\end{align} +$$ + +`erfinv, erfcinv` 为其反函数:1 +https://en.wikipedia.org/wiki/Error_function#Inverse_functions + +### Broadcasting + +![](http://deeplearning.net/software/theano/_images/bcast.png) + +图示如上。 + +## 线性代数 + +矩阵乘法:`T.dot(x, y)` + +向量外积:`T.outer(x, y)` + +张量乘法:`tensordot(a, b, axes=2)` + +`axes` 参数表示 `a` `b` 对应要去掉的维度。 + + +```python +import numpy as np + +a = np.random.random((2,3,4)) +b = np.random.random((5,6,4,3)) + +#tensordot +c = np.tensordot(a, b, [[1,2],[3,2]]) + + +#loop replicating tensordot +a0, a1, a2 = a.shape +b0, b1, _, _ = b.shape +cloop = np.zeros((a0,b0,b1)) + +#loop over non-summed indices -- these exist +#in the tensor product. +for i in range(a0): + for j in range(b0): + for k in range(b1): + #loop over summed indices -- these don't exist + #in the tensor product. + for l in range(a1): + for m in range(a2): + cloop[i,j,k] += a[i,l,m] * b[j,k,m,l] + +assert np.allclose(c, cloop) + +print a.shape, b.shape +print c.shape +``` + + (2, 3, 4) (5, 6, 4, 3) + (2, 5, 6) + diff --git a/docs/09-theano/09.18-tensor-nnet-.md b/docs/09-theano/09.18-tensor-nnet-.md new file mode 100644 index 00000000..334c12bb --- /dev/null +++ b/docs/09-theano/09.18-tensor-nnet-.md @@ -0,0 +1,123 @@ + +# Theano tensor 模块:nnet 子模块 + +`nnet` 是 `tensor` 模块中与神经网络 `Neural Networks` 相关的子模块。 + + +```python +import theano +from theano import tensor as T +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled) + + +## Sigmoid 函数 + +共有三种 `sigmoid`: + +- `T.nnet.sigmoid(x)` +- `T.nnet.ultra_sigmoid(x)` +- `T.nnet.hard_sigmoid(x)` + +精度和时间: + +`sigmoid > ultra_fast_sigmoid > hard_sigmoid` + +函数图像: + +![](http://deeplearning.net/software/theano/_images/sigmoid_prec.png) + + +```python +x, y, b = T.dvectors('x', 'y', 'b') +W = T.dmatrix('W') +y = T.nnet.sigmoid(T.dot(W, x) + b) + +print theano.pprint(y) +``` + + sigmoid(((W \dot x) + b)) + + +## 其他 + +`T.nnet.softplus(x)` 返回 + +$$\operatorname{softplus}(x) = \log_e{\left(1 + \exp(x)\right)}$$ + +会解决在 1 附近自定义函数值不准的问题。 + + +```python +x,y,b = T.dvectors('x','y','b') +W = T.dmatrix('W') +y = T.nnet.softplus(T.dot(W,x) + b) + +print theano.pprint(y) +``` + + softplus(((W \dot x) + b)) + + +`T.nnet.softplus(x)` 返回 + +$$ +\operatorname{softmax}_{ij}(x) = \frac{\exp{x_{ij}}}{\sum_k\exp(x_{ik})} +$$ + +当 `softmax` 作用到矩阵时,它会按照行进行计算。 + +不过,下面 +的代码计算性能上更加稳定: + +``` +e_x = exp(x - x.max(axis=1, keepdims=True)) +out = e_x / e_x.sum(axis=1, keepdims=True) +``` + + +```python +x,y,b = T.dvectors('x','y','b') +W = T.dmatrix('W') +y = T.nnet.softmax(T.dot(W,x) + b) + +print theano.pprint(y) +``` + + Softmax(((W \dot x) + b)) + + +`T.nnet.relu(x, alpha=0)` 返回这样一个函数: + +$$ +f(x_i) = \left\{ +\begin{aligned} +x_i, & \ x_i > 0 \\ +\alpha x_i, & \ otherwise +\end{aligned}\right. +$$ + +## 损失函数 + +`T.nnet.binary_crossentropy(output, target)` 二类交叉熵: + +$$ +\text{crossentropy}(t,o) = -(t\cdot log(o) + (1 - t) \cdot log(1 - o)) +$$ + + +```python +x, y, b, c = T.dvectors('x', 'y', 'b', 'c') +W = T.dmatrix('W') +V = T.dmatrix('V') +h = T.nnet.sigmoid(T.dot(W, x) + b) +x_recons = T.nnet.sigmoid(T.dot(V, h) + c) +recon_cost = T.nnet.binary_crossentropy(x_recons, x).mean() +``` + +`T.nnet.categorical_crossentropy(coding_dist, true_dist)` 多类交叉熵 + +$$ +H(p,q) = - \sum_x p(x) \log(q(x)) +$$ diff --git a/docs/09-theano/09.19-tensor-conv.md b/docs/09-theano/09.19-tensor-conv.md new file mode 100644 index 00000000..2544f974 --- /dev/null +++ b/docs/09-theano/09.19-tensor-conv.md @@ -0,0 +1,94 @@ + +# Theano tensor 模块:conv 子模块 + +`conv` 是 `tensor` 中处理卷积神经网络的子模块。 + +## 卷积 + +这里只介绍二维卷积: + +`T.nnet.conv2d(input, filters, input_shape=None, filter_shape=None, border_mode='valid', subsample=(1, 1), filter_flip=True, image_shape=None, **kwargs)` + +`conv2d` 函数接受两个输入: + +- `4D` 张量 `input`,其形状如下: + + `[b, ic, i0, i1]` + + +- `4D` 张量 `filter` ,其形状如下: + + `[oc, ic, f0, f1]` + +`border_mode` 控制输出大小: + +- `'valid'`:输出形状: + + `[b, oc, i0 - f0 + 1, i1 - f1 + 1]` + +- `'full'`:输出形状: + + `[b, oc, i0 + f0 - 1, i1 + f1 - 1]` + +## 池化 + +池化操作: + +`T.signal.downsample.max_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max')` + +`input` 池化操作在其最后两维进行。 + +`ds` 是池化区域的大小,用长度为 2 的元组表示。 + +`ignore_border` 设为 `Ture` 时,`(5, 5)` 在 `(2, 2)` 的池化下会变成 `(2, 2)`(5 % 2 == 1,多余的 1 个被舍去了),否则是 `(3, 3)`。 + +## MNIST 卷积神经网络形状详解 + +```python +def model(X, w, w2, w3, w4, p_drop_conv, p_drop_hidden): + + # X: 128 * 1 * 28 * 28 + # w: 32 * 1 * 3 * 3 + # full mode + # l1a: 128 * 32 * (28 + 3 - 1) * (28 + 3 - 1) + l1a = rectify(conv2d(X, w, border_mode='full')) + # l1a: 128 * 32 * 30 * 30 + # ignore_border False + # l1: 128 * 32 * (30 / 2) * (30 / 2) + l1 = max_pool_2d(l1a, (2, 2), ignore_border=False) + l1 = dropout(l1, p_drop_conv) + + # l1: 128 * 32 * 15 * 15 + # w2: 64 * 32 * 3 * 3 + # valid mode + # l2a: 128 * 64 * (15 - 3 + 1) * (15 - 3 + 1) + l2a = rectify(conv2d(l1, w2)) + # l2a: 128 * 64 * 13 * 13 + # l2: 128 * 64 * (13 / 2 + 1) * (13 / 2 + 1) + l2 = max_pool_2d(l2a, (2, 2), ignore_border=False) + l2 = dropout(l2, p_drop_conv) + + # l2: 128 * 64 * 7 * 7 + # w3: 128 * 64 * 3 * 3 + # l3a: 128 * 128 * (7 - 3 + 1) * (7 - 3 + 1) + l3a = rectify(conv2d(l2, w3)) + # l3a: 128 * 128 * 5 * 5 + # l3b: 128 * 128 * (5 / 2 + 1) * (5 / 2 + 1) + l3b = max_pool_2d(l3a, (2, 2), ignore_border=False) + # l3b: 128 * 128 * 3 * 3 + # l3: 128 * (128 * 3 * 3) + l3 = T.flatten(l3b, outdim=2) + l3 = dropout(l3, p_drop_conv) + + # l3: 128 * (128 * 3 * 3) + # w4: (128 * 3 * 3) * 625 + # l4: 128 * 625 + l4 = rectify(T.dot(l3, w4)) + l4 = dropout(l4, p_drop_hidden) + + # l5: 128 * 625 + # w5: 625 * 10 + # pyx: 128 * 10 + pyx = softmax(T.dot(l4, w_o)) + return l1, l2, l3, l4, pyx +``` diff --git a/docs/10-something-interesting/10.01-maps-using-basemap.md b/docs/10-something-interesting/10.01-maps-using-basemap.md new file mode 100644 index 00000000..b30d1598 --- /dev/null +++ b/docs/10-something-interesting/10.01-maps-using-basemap.md @@ -0,0 +1,34 @@ + +# 使用 basemap 画地图 + +# 安装 basemap + +最简单的方式是通过 [conda](http://conda.pydata.org/miniconda.html) 来进行安装: + + conda install basemap + +也可以下载下来自己编译。 + +## 简单使用 + +绘制一幅世界地图: + + +```python +%matplotlib inline + +from mpl_toolkits.basemap import Basemap +import numpy as np +import matplotlib.pyplot as plt +# lon_0 is central longitude of projection. +# resolution = 'c' means use crude resolution coastlines. +f = plt.figure(figsize=(16,9)) +m = Basemap(projection='robin',lon_0=0,resolution='c') +m.shadedrelief(scale=0.2) +plt.title("Robinson Projection") +plt.show() +``` + + +![png](output_6_0.png) + diff --git a/docs/10-something-interesting/10.02-maps-using-cartopy.md b/docs/10-something-interesting/10.02-maps-using-cartopy.md new file mode 100644 index 00000000..7ad7e26f --- /dev/null +++ b/docs/10-something-interesting/10.02-maps-using-cartopy.md @@ -0,0 +1,32 @@ + +# 使用 cartopy 画地图 + +## 安装 cartopy + +最简单的方式是通过 [conda](http://conda.pydata.org/miniconda.html) 来进行安装: + + conda install -c scitools cartopy + +也可以下载下来自己编译。 + +## 简单使用 + +绘制一幅世界地图: + + +```python +%matplotlib inline + +import cartopy.crs as ccrs +import matplotlib.pyplot as plt + +f = plt.figure(figsize=(16,9)) +ax = plt.axes(projection=ccrs.Robinson()) +ax.stock_img() + +plt.show() +``` + + +![png](output_6_0.png) + diff --git a/docs/10-something-interesting/10.03-nba-data.md b/docs/10-something-interesting/10.03-nba-data.md new file mode 100644 index 00000000..293e2353 --- /dev/null +++ b/docs/10-something-interesting/10.03-nba-data.md @@ -0,0 +1,1040 @@ + +# 探索 NBA 数据 + +我们首先安装 `Goldsberry` 包,项目源地址: + +https://github.com/bradleyfay/py-Goldsberry + +使用 `pip` 安装: + + pip install py-goldsberry + +该包的接口与 `pandas` 兼容,可以与 `pandas` 的 `DataFrame` 一起使用。 + + +```python +import goldsberry as gb +import pandas as pd +``` + +当前使用的版本号为: + + +```python +gb.__version__ +``` + + + + + '0.8.0.1' + + + +## 球员信息 + +获得 `2015-2016` 赛季运动员的名单: + + +```python +players = gb.PlayerList().players() +players = pd.DataFrame(players) + +players.head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DISPLAY_LAST_COMMA_FIRSTFROM_YEARGAMES_PLAYED_FLAGPERSON_IDPLAYERCODEROSTERSTATUSTEAM_ABBREVIATIONTEAM_CITYTEAM_CODETEAM_IDTEAM_NAMETO_YEAR
0Acy, Quincy2012Y203112quincy_acy1SACSacramentokings1610612758Kings2015
1Adams, Jordan2014Y203919jordan_adams1MEMMemphisgrizzlies1610612763Grizzlies2015
2Adams, Steven2013Y203500steven_adams1OKCOklahoma Citythunder1610612760Thunder2015
3Afflalo, Arron2007Y201167arron_afflalo1NYKNew Yorkknicks1610612752Knicks2015
4Ajinca, Alexis2008Y201582alexis_ajinca1NOPNew Orleanspelicans1610612740Pelicans2015
+
+ + + +球员总数为: + + +```python +print len(players) +``` + + 464 + + +通过查询特定的 `TEAM_ABBREVIATION`,我们可以查看某个球队本赛季的球员,比如 `2014-2015` 赛季的总冠军金州勇士 `GSW`: + + +```python +gsw_players = players.ix[players["TEAM_ABBREVIATION"] == "GSW"] + +gsw_players[["DISPLAY_LAST_COMMA_FIRST", "FROM_YEAR", "TEAM_ABBREVIATION", "TEAM_CITY", "TEAM_NAME", "PERSON_ID"]] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DISPLAY_LAST_COMMA_FIRSTFROM_YEARTEAM_ABBREVIATIONTEAM_CITYTEAM_NAMEPERSON_ID
30Barbosa, Leandro2003GSWGolden StateWarriors2571
33Barnes, Harrison2012GSWGolden StateWarriors203084
52Bogut, Andrew2005GSWGolden StateWarriors101106
86Clark, Ian2013GSWGolden StateWarriors203546
103Curry, Stephen2009GSWGolden StateWarriors201939
135Ezeli, Festus2012GSWGolden StateWarriors203105
164Green, Draymond2012GSWGolden StateWarriors203110
209Iguodala, Andre2004GSWGolden StateWarriors2738
262Livingston, Shaun2004GSWGolden StateWarriors2733
263Looney, Kevon2015GSWGolden StateWarriors1626172
279McAdoo, James Michael2014GSWGolden StateWarriors203949
377Rush, Brandon2008GSWGolden StateWarriors201575
398Speights, Marreese2008GSWGolden StateWarriors201578
414Thompson, Jason2008GSWGolden StateWarriors201574
415Thompson, Klay2011GSWGolden StateWarriors202691
+
+ + + +## 球员比赛数据 + +通过 `DISPLAY_LAST_COMMA_FIRST`,我们来查询宣布本赛季之后退役的科比布莱恩特(`Kobe, Bryant`)的信息: + + +```python +kobe = players.ix[players["DISPLAY_LAST_COMMA_FIRST"].str.contains("Kobe")] + +kobe +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DISPLAY_LAST_COMMA_FIRSTFROM_YEARGAMES_PLAYED_FLAGPERSON_IDPLAYERCODEROSTERSTATUSTEAM_ABBREVIATIONTEAM_CITYTEAM_CODETEAM_IDTEAM_NAMETO_YEAR
64Bryant, Kobe1996Y977kobe_bryant1LALLos Angeleslakers1610612747Lakers2015
+
+ + + +为了方便,我们将 `Kobe` 的 `ID` 放到变量中去: + + +```python +kobe_id = 977 +``` + +我们来看本赛季 `Kobe` 的比赛记录: + + +```python +kobe_logs = gb.player.game_logs(kobe_id) + +kobe_logs = pd.DataFrame(kobe_logs.logs()) + +# 最近五场比赛 +kobe_logs.head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ASTBLKDREBFG3AFG3MFG3_PCTFGAFGMFG_PCTFTA...PFPLUS_MINUSPTSPlayer_IDREBSEASON_IDSTLTOVVIDEO_AVAILABLEWL
0306730.4291650.3134...2-1917977622015131L
10041440.2862560.2404...0-619977522015001L
24111440.2862890.3213...4-225977222015021L
32091140.36424100.4174...016279771222015211W
45031170.63621100.47612...3638977522015221W
+

5 rows × 27 columns

+
+ + + +截至到全明星赛前,本赛季 `Kobe` 一共参加了 44 场比赛,其场均数据为: + + +```python +kobe_logs.Game_ID +``` + + + + + 0 0021500795 + 1 0021500776 + 2 0021500767 + 3 0021500747 + 4 0021500734 + 5 0021500720 + 6 0021500697 + 7 0021500662 + 8 0021500653 + 9 0021500638 + 10 0021500614 + 11 0021500608 + 12 0021500592 + 13 0021500576 + 14 0021500549 + 15 0021500539 + 16 0021500476 + 17 0021500458 + 18 0021500455 + 19 0021500440 + 20 0021500435 + 21 0021500422 + 22 0021500385 + 23 0021500370 + 24 0021500349 + 25 0021500342 + 26 0021500325 + 27 0021500308 + 28 0021500301 + 29 0021500286 + 30 0021500269 + 31 0021500263 + 32 0021500253 + 33 0021500244 + 34 0021500214 + 35 0021500201 + 36 0021500188 + 37 0021500151 + 38 0021500135 + 39 0021500095 + 40 0021500077 + 41 0021500059 + 42 0021500045 + 43 0021500031 + 44 0021500017 + Name: Game_ID, dtype: object + + + + +```python +def show_avg_info(avg): + print "得分:{:.1f}".format(avg.ix["PTS"]) + print "篮板:{:.1f}".format(avg.ix["REB"]) + print "助攻:{:.1f}".format(avg.ix["AST"]) + print "盖帽:{:.1f}".format(avg.ix["BLK"]) + print "时间:{:.1f}".format(avg.ix["MIN"]) + print "抢断:{:.1f}".format(avg.ix["STL"]) + print "失误:{:.1f}".format(avg.ix["TOV"]) + print "犯规:{:.1f}".format(avg.ix["PF"]) + print "投篮:{:.1f}%".format(avg.ix["FGM"] * 100 / avg.ix["FGA"]) + print "三分:{:.1f}%".format(avg.ix["FG3M"] * 100 / avg.ix["FG3A"]) + print "罚篮:{:.1f}%".format(avg.ix["FTM"] * 100 / avg.ix["FTA"]) + print "后篮板:{:.1f}".format(avg.ix["DREB"]) + print "前篮板:{:.1f}".format(avg.ix["OREB"]) + print "正负值:{:.1f}".format(avg.ix["PLUS_MINUS"]) + +show_avg_info(kobe_logs.mean()) +``` + + 得分:16.9 + 篮板:4.2 + 助攻:3.4 + 盖帽:0.2 + 时间:29.3 + 抢断:1.0 + 失误:2.2 + 犯规:1.9 + 投篮:34.9% + 三分:28.0% + 罚篮:80.3% + 后篮板:3.5 + 前篮板:0.7 + 正负值:-7.9 + + +再看一下史提芬库里的场均数据(不要问我为什么跪着看球): + + +```python +curry_id = 201939 +curry_logs = gb.player.game_logs(curry_id) +curry_logs = pd.DataFrame(curry_logs.logs()) + +show_avg_info(curry_logs.mean()) +``` + + 得分:29.8 + 篮板:5.3 + 助攻:6.6 + 盖帽:0.2 + 时间:33.9 + 抢断:2.1 + 失误:3.3 + 犯规:2.0 + 投篮:50.8% + 三分:45.4% + 罚篮:91.2% + 后篮板:4.5 + 前篮板:0.9 + 正负值:15.5 + + +当然我们也可以对比一下职业生涯的数据: + + +```python +kobe_career = gb.player.career_stats(kobe_id) +curry_career = gb.player.career_stats(curry_id) +``` + +职业生涯最高: + + +```python +def show_career_high(career): + career_high = pd.DataFrame(career.career_high()).ix[[0,1,5]] + print career_high[["GAME_DATE", "STAT", "STAT_VALUE", "VS_TEAM_CITY", "VS_TEAM_NAME"]] + +print "Kobe" +show_career_high(kobe_career) + +print "Curry" +show_career_high(curry_career) +``` + + Kobe + GAME_DATE STAT STAT_VALUE VS_TEAM_CITY VS_TEAM_NAME + 0 JAN 22 2006 PTS 81 Toronto Raptors + 1 JAN 24 2010 REB 16 Toronto Raptors + 5 JAN 15 2015 AST 17 Cleveland Cavaliers + Curry + GAME_DATE STAT STAT_VALUE VS_TEAM_CITY VS_TEAM_NAME + 0 FEB 27 2013 PTS 54 New York Knicks + 1 DEC 28 2015 REB 14 Sacramento Kings + 5 DEC 27 2013 AST 16 Phoenix Suns + + +本赛季最高: + + +```python +def show_season_high(career): + career_high = pd.DataFrame(career.season_high()).ix[[0,1,5]] + print career_high[["GAME_DATE", "STAT", "STAT_VALUE", "VS_TEAM_CITY", "VS_TEAM_NAME"]] + +print "Kobe" +show_season_high(kobe_career) + +print "Curry" +show_season_high(curry_career) +``` + + Kobe + GAME_DATE STAT STAT_VALUE VS_TEAM_CITY VS_TEAM_NAME + 0 FEB 02 2016 PTS 38 Minnesota Timberwolves + 1 FEB 04 2016 REB 12 New Orleans Pelicans + 5 NOV 15 2015 AST 9 Detroit Pistons + Curry + GAME_DATE STAT STAT_VALUE VS_TEAM_CITY VS_TEAM_NAME + 0 OCT 31 2015 PTS 53 New Orleans Pelicans + 1 DEC 28 2015 REB 14 Sacramento Kings + 5 JAN 25 2016 STL 5 San Antonio Spurs + + +## 比赛信息 + + +```python +game_ids = gb.GameIDs() +game_ids = pd.DataFrame(game_ids.game_list()) + +game_ids.head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ASTBLKDREBFG3AFG3MFG3_PCTFGAFGMFG_PCTFTA...PTSREBSEASON_IDSTLTEAM_ABBREVIATIONTEAM_IDTEAM_NAMETOVVIDEO_AVAILABLEWL
0284452980.276124560.45246...14764220157DET1610612765Detroit Pistons111W
1302362390.39187530.60934...14246220159SAC1610612758Sacramento Kings151W
2342302190.42986520.60513...123382201510SAS1610612759San Antonio Spurs131W
32963635160.45795520.54715...131462201510GSW1610612744Golden State Warriors151W
4348383180.258104520.50016...122462201510SAC1610612758Sacramento Kings201L
+

5 rows × 29 columns

+
+ + + +## 获得运动员的头像 + + +```python +from IPython.display import Image + +Image("http://stats.nba.com/media/players/230x185/"+str(kobe_id)+".png") +``` + + + + +![png](output_33_0.png) + + + + +```python +Image("http://stats.nba.com/media/players/230x185/"+str(curry_id)+".png") +``` + + + + +![png](output_34_0.png) + + + +## More + +修改了 `goldsberry\player\_Player.py` 代码中的错误,使之能够查询退役球员的信息,修改后的代码在本文件夹下,放到安装目录之后下面的代码均可以运行: + + +```python +from goldsberry.player import _Player as pl_old +``` + +1997 年的球员列表: + + +```python +players_1997 = pl_old.PlayerList(1997) + +players_1997 = pd.DataFrame(players_1997) +``` + +乔丹的球员 ID: + + +```python +jordan_id = players_1997["PERSON_ID"].ix[players_1997["DISPLAY_LAST_COMMA_FIRST"].str.contains("Jordan, Michael")] +jordan_id = jordan_id[jordan_id.index[0]] +jordan_id +``` + + + + + 893 + + + +乔丹在 1997-1998 赛季常规赛表现: + + +```python +jordan_logs_1997 = pl_old.game_logs(jordan_id, season="1997") +jordan_logs_1997 = pd.DataFrame(jordan_logs_1997.logs()) + +show_avg_info(jordan_logs_1997.mean()) +``` + + 得分:28.7 + 篮板:5.8 + 助攻:3.5 + 盖帽:0.5 + 时间:38.9 + 抢断:1.7 + 失误:2.3 + 犯规:1.8 + 投篮:46.5% + 三分:23.8% + 罚篮:78.4% + 后篮板:4.2 + 前篮板:1.6 + 正负值:7.3 + + +乔丹在 1997-1998 赛季季后赛表现: + + +```python +jordan_logs_1997 = pl_old.game_logs(jordan_id, season="1997", seasontype=2) +jordan_logs_1997 = pd.DataFrame(jordan_logs_1997.logs()) + +show_avg_info(jordan_logs_1997.mean()) +``` + + 得分:32.4 + 篮板:5.1 + 助攻:3.5 + 盖帽:0.6 + 时间:41.0 + 抢断:1.5 + 失误:2.1 + 犯规:2.2 + 投篮:46.2% + 三分:30.2% + 罚篮:81.2% + 后篮板:3.5 + 前篮板:1.6 + 正负值:7.5 + + +头像: + + +```python +Image("http://stats.nba.com/media/players/230x185/"+str(jordan_id)+".png") +``` + + + + +![png](output_47_0.png) + + diff --git a/docs/10-something-interesting/10.04-louis-cha's-kungfu-world.md b/docs/10-something-interesting/10.04-louis-cha's-kungfu-world.md new file mode 100644 index 00000000..677feec3 --- /dev/null +++ b/docs/10-something-interesting/10.04-louis-cha's-kungfu-world.md @@ -0,0 +1,791 @@ + +# 金庸的武侠世界 + +金庸老爷子一共写了15部武侠小说,它们分别是: + +- 《飞狐外传》(1960年) +- 《雪山飞狐》(1959年) +- 《连城诀》(1963年) +- 《天龙八部》(1963年) +- 《射雕英雄传》(1957年) +- 《白马啸西风》(1961年) +- 《鹿鼎记》(1969年) +- 《笑傲江湖》(1967年) +- 《书剑恩仇录》(1955年) +- 《神雕侠侣》(1959年) +- 《侠客行》(1965年) +- 《倚天屠龙记》(1961年) +- 《碧血剑》(1956年) +- 《鸳鸯刀》(1961年) +- 《越女剑》(1970年) + +我们现在就用 `Python` 来探索一下金庸小说中的武侠世界吧。 + +## 准备工作 + +再处理小说之前,我们需要先做一些准备工作。 + +因为涉及中文字符,所以我们使用 `__future__` 中 Python 3 的特性,将所有的字符串转为 `unicode`。 + + +```python +from __future__ import unicode_literals +``` + +再来我们解决图像里中文字符显示的问题,Matplotlib虽然支持 `unicode` 编码,但是直接输出中文字体会出现问题。 + + +```python +import numpy as np +import scipy as sp +import matplotlib.pyplot as plt +%matplotlib inline + +x = range(10) +plt.plot(x) +plt.title("中文") +plt.show() +``` + + +![png](output_6_0.png) + + +出现上图的原因是它找不到合适的中文字体去显示中文,为此,我们可以去寻找一些支持中文的字体来进行设置。 + +`Windows 7` 及以上的系统中,字体位置为 `C:/Windows/Fonts`,例如: +- 宋体:`C:/Windows/Fonts/simsun.ttc` + +`Linux` 系统可以通过 `fc-list` 命令查看已有的字体和相应的位置,例如: +- `/usr/share/fonts/truetype/osx-font-family/Songti.ttc: Songti TC,宋體\-繁,宋体\-繁:style=Bold,粗體,粗体` +- `/usr/share/fonts/truetype/osx-font-family/Devanagari Sangam MN.ttc: Devanagari Sangam MN,देवनागरी संगम एम॰एन॰:style=Bold,粗體,Fed,Fett,Puolilihava,Gras,Grassetto,ボールド,볼드체,Vet,Fet,Negrito,Жирный,बोल्ड,粗体,Negrita` +- `/usr/share/fonts/truetype/osx-font-family/Iowan Old Style.ttc: Iowan Old Style,Iowan Old Style Black:style=Black Italic,Italic` + +也可以从网上直接下载字体 +- 比如 `Yahei Consolas` 的字体 `YaHei.Consolas.1.11b.ttf`。 + +找到了字体的位置,我们可以使用 `matplotlib.font_manager` 中的 `FontProperties` 导入字体: + + font_xxx = FontProperties(fname="/usr/share/fonts/truetype/osx-font-family/Songti.ttc") + font_xxx = FontProperties(fname="C://Windows//Fonts//simsun.ttc") + +为了方便,我们不使用字体的绝对路径导入,而是将需要的字体放在程序对应的文件夹下: + +- `simsum.ttc` +- `YaHei.Consolas.1.11b.ttf` + + +```python +from matplotlib.font_manager import FontProperties + +font_simsum = FontProperties(fname="simsun.ttc") +font_yahei_consolas = FontProperties(fname="YaHei.Consolas.1.11b.ttf") +``` + +在绘图的时候进行设置: + + +```python +x = range(10) +plt.plot(x) +plt.title("中文", fontproperties=font_yahei_consolas, fontsize=14) +plt.show() +``` + + +![png](output_10_0.png) + + +## 小说概览 + +我们从网上找到金庸小说的 txt 全文,放在 `novels` 文件夹中: + + +```python +!ls novels +``` + + 书剑恩仇录.txt 天龙八部.txt 碧血剑.txt 越女剑.txt 飞狐外传.txt + 侠客行.txt 射雕英雄传.txt 神雕侠侣.txt 连城诀.txt 鸳鸯刀.txt + 倚天屠龙记.txt 白马啸西风.txt 笑傲江湖.txt 雪山飞狐.txt 鹿鼎记.txt + + +先一睹为快: + + +```python +with open('novels/鸳鸯刀.txt') as f: + # 去掉结尾的换行符 + data = [line.strip().decode('utf8') for line in f.readlines()] + +for line in data[:13]: + if line.strip(): + print line +``` + +  +   鸳鸯刀 +   四个劲装结束的汉子并肩而立,拦在当路! +   若是黑道上山寨的强人,不会只有四个,莫非在这黑沉沉的松林之中,暗中还埋伏下大批人手?如是剪径的小贼,见了这么声势浩大的镖队,远避之唯恐不及,哪敢这般大模大样的拦路挡道?难道竟是武林高手,冲着自己而来? +   凝神打量四人:最左一人短小精悍,下巴尖削,手中拿着一对峨嵋钢刺。第二个又高又肥,便如是一座铁塔摆在地下,身前放着一块大石碑,碑上写的是“先考黄府君诚本之墓”,这自是一块墓碑了,不知放在身前有何用意?黄诚本?没听说江湖上有这么一位前辈高手啊!第三个中等身材,白净脸皮,若不是一副牙齿向外凸出了一寸,一个鼻头低陷了半寸,倒算得上是一位相貌英俊的人物,他手中拿的是一对流星锤。最右边的是个病夫模样的中年人,衣衫褴褛,咬着一根旱烟管,双目似睁似闭,嘴里慢慢喷着烟雾,竟是没将这一队七十来人的镖队瞧在眼里。 +   那三人倒还罢了,这病夫定是个内功深湛的劲敌。顷刻之间,江湖上许多轶闻往事涌上了心头:一个白发婆婆空手杀死了五名镖头,劫走了一支大镖;一个老乞丐大闹太原府公堂,割去了知府的首级,倏然间不知去向;一个美貌大姑娘打倒了晋北大同府享名二十余年的张大拳师……越是貌不惊人、满不在乎的人物,越是武功了得,江湖上有言道:“真人不露相,露相不真人。” +   瞧着这个闭目抽烟的病夫,陕西西安府威信镖局的总镖头、“铁鞭镇八方”周威信不由得深自踌躇起来,不由自主的伸手去摸了一摸背上的包袱。 + + +## 出场人物 + +接着,我们先找到金庸小说中所有出场的人物,放到 names.txt 文件中,其格式为: + +``` +小说1 +人物1 人物2 …… +小说2 +人物1 人物2 …… +小说3 +人物1 人物2 …… +…… +``` + + +```python +with open('names.txt') as f: + # 去掉结尾的换行符 + data = [line.strip().decode('utf8') for line in f.readlines()] + +novels = data[::2] +names = data[1::2] + +novel_names = {k: v.split() for k, v in zip(novels, names)} + +for name in novel_names['天龙八部'][:20]: + print name +``` + + 刀白凤 + 丁春秋 + 马夫人 + 马五德 + 小翠 + 于光豪 + 巴天石 + 不平道人 + 邓百川 + 风波恶 + 甘宝宝 + 公冶乾 + 木婉清 + 包不同 + 天狼子 + 太皇太后 + 王语嫣 + 乌老大 + 无崖子 + 云岛主 + + +## 寻找主角光环 + +我们来看看人物在小说中的出场次数统计。 + +显然出场次数越多,自然主角光环越强,我们定义一个函数寻找小说中主角光环最强的几个人: + + +```python +def find_main_charecters(novel, num=10): + with open('novels/{}.txt'.format(novel)) as f: + data = f.read().decode('utf8') + count = [] + for name in novel_names[novel]: + count.append([name, data.count(name)]) + count.sort(key=lambda x: x[1]) + _, ax = plt.subplots() + + numbers = [x[1] for x in count[-num:]] + names = [x[0] for x in count[-num:]] + ax.barh(range(num), numbers, color='red', align='center') + ax.set_title(novel, fontsize=14, fontproperties=font_yahei_consolas) + ax.set_yticks(range(num)) + ax.set_yticklabels(names, fontsize=14, fontproperties=font_yahei_consolas) +``` + +天龙八部: + + +```python +find_main_charecters("天龙八部") +``` + + +![png](output_23_0.png) + + +显然,就《天龙八部》来说,萧(乔)峰,段誉,虚竹这三兄弟的主角光环最强。 + +再看射雕三部曲: + + +```python +find_main_charecters("射雕英雄传") +find_main_charecters("神雕侠侣") +find_main_charecters("倚天屠龙记") +``` + + +![png](output_25_0.png) + + + +![png](output_25_1.png) + + + +![png](output_25_2.png) + + +## Word2Vec + +接下来,我们将使用一些机器学习的观点来处理这些小说。 + +`Word2Vec` 是一款将词表征为实数值向量的高效工具,原理就不过多介绍了,感兴趣的可以自行搜索, + +`gensim` 包提供了一个 `Python` 版的实现。 + +- 源代码地址:https://github.com/RaRe-Technologies/gensim +- 官方文档地址:http://radimrehurek.com/gensim/ + +首先安装 `gensim`: + + pip install gensim + +安装完成之后,导入这个包: + + +```python +import gensim +``` + + Using gpu device 1: Tesla C2075 (CNMeM is disabled, CuDNN not available) + + +## 中文分词 + +虽然我们安装了 `gensim`,但我们还不可以直接使用它来进行 `Word2Vec` 的操作,因为 `Word2Vec` 中的词默认是用空格分隔的,而中文小说显然不符合这个要求,为此,我们需要对中文进行分词。 + +一个比较好用的 `Python` 中文分词包叫做 `jieba` (结巴)。 + +- 源代码地址:https://github.com/fxsjy/jieba + +安装 jieba: + + pip install jieba + +导入: + + +```python +import jieba +``` + +`jieba` 包具有识别新词的能力,不过为了得到更准确的分词结果,我们可以将人名导入 `jieba` 库的字典,除此之外,我们还加入门派和武功的专有名词: + + +```python +for _, names in novel_names.iteritems(): + for name in names: + jieba.add_word(name) + +with open("kungfu.txt") as f: + kungfu_names = [line.decode('utf8').strip() for line in f.readlines()] + +with open("bangs.txt") as f: + bang_names = [line.decode('utf8').strip() for line in f.readlines()] + +for name in kungfu_names: + jieba.add_word(name) + +for name in bang_names: + jieba.add_word(name) +``` + + Building prefix dict from the default dictionary ... + DEBUG:jieba:Building prefix dict from the default dictionary ... + Loading model from cache /tmp/jieba.cache + DEBUG:jieba:Loading model from cache /tmp/jieba.cache + Loading model cost 0.370 seconds. + DEBUG:jieba:Loading model cost 0.370 seconds. + Prefix dict has been built succesfully. + DEBUG:jieba:Prefix dict has been built succesfully. + + +我们按照段落处理文本: + + +```python +novels = ["书剑恩仇录", + "天龙八部", + "碧血剑", + "越女剑", + "飞狐外传", + "侠客行", + "射雕英雄传", + "神雕侠侣", + "连城诀", + "鸳鸯刀", + "倚天屠龙记", + "白马啸西风", + "笑傲江湖", + "雪山飞狐", + "鹿鼎记"] + +sentences = [] + +for novel in novels: + print "处理:{}".format(novel) + with open('novels/{}.txt'.format(novel)) as f: + data = [line.decode('utf8').strip() for line in f.readlines() if line.decode('utf8').strip()] + for line in data: + words = list(jieba.cut(line)) + sentences.append(words) +``` + + 处理:书剑恩仇录 + 处理:天龙八部 + 处理:碧血剑 + 处理:越女剑 + 处理:飞狐外传 + 处理:侠客行 + 处理:射雕英雄传 + 处理:神雕侠侣 + 处理:连城诀 + 处理:鸳鸯刀 + 处理:倚天屠龙记 + 处理:白马啸西风 + 处理:笑傲江湖 + 处理:雪山飞狐 + 处理:鹿鼎记 + + +## 训练模型 + +默认参数进行训练: + + +```python +model = gensim.models.Word2Vec(sentences, size=100, window=5, min_count=5, workers=4) +``` + +我们可以把训练好的模型存到本地: + + +```python +model.save("louis_cha.model") +``` + +从本地磁盘上读取模型: + + +```python +model = gensim.models.Word2Vec.load("louis_cha.model") +``` + +## 相似度结果 + +有了模型,我们可以进行一些简单而有趣的测试。 + +首先看与乔峰相似的人: + + +```python +for k, s in model.most_similar(positive=["乔峰", "萧峰"]): + print k, s +``` + + 段正淳 0.839533925056 + 慕容复 0.800726354122 + 虚竹 0.796926677227 + 童姥 0.791711509228 + 谢烟客 0.787050366402 + 游坦之 0.786818385124 + 余鱼同 0.780444204807 + 袁承志 0.779631733894 + 钟万仇 0.759801149368 + 贝海石 0.756160736084 + + +出来的都是出场较多的男性人物。 + +再看看与阿朱相似的人: + + +```python +for k, s in model.most_similar(positive=["阿朱"]): + print k, s +``` + + 钟灵 0.789930582047 + 阿紫 0.77720785141 + 方怡 0.774438858032 + 钟夫人 0.767169654369 + 香香公主 0.763835728168 + 王语嫣 0.761606991291 + 青青 0.761157155037 + 仪琳 0.75483584404 + 木婉清 0.751208424568 + 段誉 0.745343744755 + + +这回出现的是一群女性角色。 + +除了人物,我们可以看看门派: + + +```python +for k, s in model.most_similar(positive=["丐帮"]): + print k, s +``` + + 长乐帮 0.807791054249 + 雪山派 0.793763160706 + 峨嵋派 0.792181968689 + 天地会 0.789434850216 + 门人 0.785883545876 + 红花会 0.78480899334 + 恒山派 0.779587745667 + 嵩山派 0.77581256628 + 全真教 0.763592064381 + 魔教 0.746910750866 + + +这个出来的结果就是一堆门派。 + +还可以看看武功秘籍: + + +```python +for k, s in model.most_similar(positive=["降龙十八掌"]): + print k, s +``` + + 打狗棒法 0.89123404026 + 空明拳 0.890258312225 + 太极拳 0.884406626225 + 一阳指 0.874251723289 + 心法 0.874069094658 + 八卦掌 0.864349603653 + 绝招 0.864094853401 + 乾坤大挪移 0.858512759209 + 六合拳 0.852675139904 + 拳法 0.848574995995 + + +在 Word2Vec 的模型里,有过“中国-北京=法国-巴黎”的例子,这里我们也可以找到这样的例子: + + +```python +def find_relationship(a, b, c): + """ + 返回 d + a与b的关系,跟c与d的关系一样 + """ + d, _ = model.most_similar(positive=[c, b], negative=[a])[0] + print "给定“{}”与“{}”,“{}”和“{}”有类似的关系".format(a, b, c, d) + +find_relationship("段誉", "段公子", "乔峰") +``` + + 给定“段誉”与“段公子”,“乔峰”和“乔帮主”有类似的关系 + + +类似的: + + +```python +# 情侣对 +find_relationship("郭靖", "黄蓉", "杨过") + +# 岳父女婿 +find_relationship("令狐冲", "任我行", "郭靖") + +# 非情侣 +find_relationship("郭靖", "华筝", "杨过") +``` + + 给定“郭靖”与“黄蓉”,“杨过”和“小龙女”有类似的关系 + 给定“令狐冲”与“任我行”,“郭靖”和“黄药师”有类似的关系 + 给定“郭靖”与“华筝”,“杨过”和“绿萼”有类似的关系 + + +以及,小宝你是有多爱男人: + + +```python +# 韦小宝 +find_relationship("杨过", "小龙女", "韦小宝") +find_relationship("令狐冲", "盈盈", "韦小宝") +find_relationship("张无忌", "赵敏", "韦小宝") +``` + + 给定“杨过”与“小龙女”,“韦小宝”和“康熙”有类似的关系 + 给定“令狐冲”与“盈盈”,“韦小宝”和“康熙”有类似的关系 + 给定“张无忌”与“赵敏”,“韦小宝”和“康熙”有类似的关系 + + +除了人物之间的关系,门派武功之间的关系: + + +```python +find_relationship("郭靖", "降龙十八掌", "黄蓉") +find_relationship("武当", "张三丰", "少林") +find_relationship("任我行", "魔教", "令狐冲") +``` + + 给定“郭靖”与“降龙十八掌”,“黄蓉”和“打狗棒法”有类似的关系 + 给定“武当”与“张三丰”,“少林”和“灭绝师太”有类似的关系 + 给定“任我行”与“魔教”,“令狐冲”和“恒山派”有类似的关系 + + +## 人物聚类分析 + +之前我们对文本进行 `Word2Vec` 的结果,是将一个中文词组,映射到了一个向量空间,因此,我们可以利用这个向量表示的空间,对这些词进行聚类分析。 + +因为全部小说中的人物太多,我们考虑从单本小说进行入手: + + +```python +all_names = [] +word_vectors = None + +for name in novel_names["天龙八部"]: + if name in model: + all_names.append(name) + if word_vectors is None: + word_vectors = model[name] + else: + word_vectors = np.vstack((word_vectors, model[name])) + +all_names = np.array(all_names) +``` + +### KMeans + +聚类我们可以使用很多方法,这里我们用 scikit-learn 自带的几种聚类方法进行聚类。 + +先考虑 Kmeans: + + +```python +from sklearn.cluster import KMeans +``` + +如果只分成3类,那么很明显地可以将众人分成主角,配角,跑龙套的三类: + + +```python +N = 3 +label = KMeans(N).fit(word_vectors).labels_ + +for c in range(N): + print "\n类别{}:".format(c+1) + for idx, name in enumerate(all_names[label==c]): + print name, + if idx % 10 == 9: + print + print +``` + + + 类别1: + 刀白凤 丁春秋 马夫人 巴天石 邓百川 风波恶 公冶乾 包不同 乌老大 云中鹤 + 白世镜 本因 过彦之 司马林 玄慈 玄寂 玄难 叶二娘 左子穆 李秋水 + 全冠清 阮星竹 朱丹臣 阿碧 波罗星 鸠摩智 耶律洪基 苏星河 段延庆 范骅 + 赵钱孙 哲罗星 钟万仇 秦红棉 徐长老 崔百泉 萧远山 褚万里 慕容博 谭婆 + 薛慕华 + + 类别2: + 马五德 小翠 不平道人 甘宝宝 天狼子 太皇太后 无崖子 止清 天山童姥 本参 + 本观 本相 出尘子 冯阿三 古笃诚 兰剑 平婆婆 石嫂 司空玄 玄苦 + 玄生 玄痛 耶律莫哥 李春来 李傀儡 刘竹庄 朴者和尚 许卓诚 竹剑 阿洪 + 阿胜 陈孤雁 来福儿 努儿海 宋长老 苏辙 吴长风 辛双清 严妈妈 余婆婆 + 岳老三 张全祥 单伯山 单季山 单小山 单正 段正明 宗赞王子 苟读 华赫艮 + 郁光标 卓不凡 范百龄 哈大霸 吴光胜 梦姑 神山上人 神音 室里 姚伯当 + 幽草 龚光杰 贾老者 康广陵 容子矩 桑土公 唐光雄 奚长老 诸保昆 崔绿华 + 符敏仪 菊剑 梅剑 游骥 游驹 傅思归 葛光佩 缘根 鲍千灵 智光大师 + 瑞婆婆 端木元 黎夫人 谭公 赫连铁树 谭青 摘星子 慧方 慧观 慧净 + 慧真 穆贵妃 吴领军 易大彪 + + 类别3: + 木婉清 王语嫣 乔峰 萧峰 阿朱 阿紫 段誉 段正淳 钟灵 虚竹 + 游坦之 慕容复 + + +我们把众龙套去掉: + + +```python +N = 4 +c = sp.stats.mode(label).mode +remain_names = all_names[label!=c] +remain_vectors = word_vectors[label!=c] + +remain_label = KMeans(N).fit(remain_vectors).labels_ + +for c in range(N): + print "\n类别{}:".format(c+1) + for idx, name in enumerate(remain_names[remain_label==c]): + print name, + if idx % 10 == 9: + print + print +``` + + + 类别1: + 刀白凤 马夫人 风波恶 包不同 乌老大 白世镜 司马林 叶二娘 左子穆 李秋水 + 阮星竹 阿碧 苏星河 赵钱孙 钟万仇 秦红棉 崔百泉 萧远山 慕容博 谭婆 + 薛慕华 + + 类别2: + 木婉清 王语嫣 阿朱 阿紫 段誉 钟灵 虚竹 + + 类别3: + 丁春秋 云中鹤 乔峰 萧峰 鸠摩智 段延庆 段正淳 游坦之 慕容复 + + 类别4: + 巴天石 邓百川 公冶乾 本因 过彦之 玄慈 玄寂 玄难 全冠清 朱丹臣 + 波罗星 耶律洪基 范骅 哲罗星 徐长老 褚万里 + + +可以看到,段家的儿子和女儿倾向于聚在一起,而反派角色则倾向于被聚在一起。 + +### 层级聚类 + +换一本小说: + + +```python +all_names = [] +word_vectors = None + +for name in novel_names["倚天屠龙记"]: + if name in model: + all_names.append(name) + if word_vectors is None: + word_vectors = model[name] + else: + word_vectors = np.vstack((word_vectors, model[name])) + +all_names = np.array(all_names) +``` + +这次采用层级聚类的方式,调用的是 `Scipy` 中层级聚类的包: + + +```python +import scipy.cluster.hierarchy as sch +``` + + +```python +Y = sch.linkage(word_vectors, method="ward") + +_, ax = plt.subplots(figsize=(10, 40)) + +Z = sch.dendrogram(Y, orientation='right') +idx = Z['leaves'] + +ax.set_xticks([]) +ax.set_yticklabels(all_names[idx], fontproperties=font_yahei_consolas) +ax.set_frame_on(False) + +plt.show() +``` + + +![png](output_76_0.png) + + +来看红色聚类区的上半部分: + +与张教主相关的人物:两个女人,赵敏和周芷若;父母和义父。 + +而红色聚类区的下半部分主要是明教与武当中与张无忌相关的部分。 + +### 门派和武功 + +除了人物,我们还可以考虑对武功进行聚类分析: + + +```python +all_names = [] +word_vectors = None +for name in kungfu_names: + if name in model: + all_names.append(name) + if word_vectors is None: + word_vectors = model[name] + else: + word_vectors = np.vstack((word_vectors, model[name])) + +all_names = np.array(all_names) + +Y = sch.linkage(word_vectors, method="ward") + +_, ax = plt.subplots(figsize=(10, 35)) + +Z = sch.dendrogram(Y, orientation='right') +idx = Z['leaves'] + +ax.set_xticks([]) +ax.set_yticklabels(all_names[idx], fontproperties=font_yahei_consolas) +ax.set_frame_on(False) + +plt.show() +``` + + +![png](output_80_0.png) + + +可以认为,一部分是主角比较厉害的武功,比较多的那部分只能算是配角们的武功。 + +再来就是门派的聚类: + + +```python +all_names = [] +word_vectors = None +for name in bang_names: + if name in model: + all_names.append(name) + if word_vectors is None: + word_vectors = model[name] + else: + word_vectors = np.vstack((word_vectors, model[name])) + +all_names = np.array(all_names) + +Y = sch.linkage(word_vectors, method="ward") + +_, ax = plt.subplots(figsize=(10, 25)) + +Z = sch.dendrogram(Y, orientation='right') +idx = Z['leaves'] + +ax.set_xticks([]) +ax.set_yticklabels(all_names[idx], fontproperties=font_yahei_consolas) +ax.set_frame_on(False) + +plt.show() +``` + + +![png](output_83_0.png) + + +比较少的这一类,基本都是在某几部小说中出现的主要门派,而大多数门派都是打酱油的。 + +## 总结 + +> 飞雪连天射白鹿,笑书神侠倚碧鸳。 + +

diff --git a/docs/11-useful-tools/11.01-pprint.md b/docs/11-useful-tools/11.01-pprint.md new file mode 100644 index 00000000..c88780d4 --- /dev/null +++ b/docs/11-useful-tools/11.01-pprint.md @@ -0,0 +1,46 @@ + +# pprint 模块:打印 Python 对象 + +`pprint` 是 pretty printer 的缩写,用来打印 Python 数据结构,与 `print` 相比,它打印出来的结构更加整齐,便于阅读。 + + +```python +import pprint +``` + +生成一个 Python 对象: + + +```python +data = ( + "this is a string", + [1, 2, 3, 4], + ("more tuples", 1.0, 2.3, 4.5), + "this is yet another string" + ) +``` + +使用普通的 `print` 函数: + + +```python +print data +``` + + ('this is a string', [1, 2, 3, 4], ('more tuples', 1.0, 2.3, 4.5), 'this is yet another string') + + +使用 `pprint` 模块中的 `pprint` 函数: + + +```python +pprint.pprint(data) +``` + + ('this is a string', + [1, 2, 3, 4], + ('more tuples', 1.0, 2.3, 4.5), + 'this is yet another string') + + +可以看到,这样打印出来的公式更加美观。 diff --git a/docs/11-useful-tools/11.02-pickle-and-cPickle.md b/docs/11-useful-tools/11.02-pickle-and-cPickle.md new file mode 100644 index 00000000..07efeded Binary files /dev/null and b/docs/11-useful-tools/11.02-pickle-and-cPickle.md differ diff --git a/docs/11-useful-tools/11.03-json.md b/docs/11-useful-tools/11.03-json.md new file mode 100644 index 00000000..b3e423f0 --- /dev/null +++ b/docs/11-useful-tools/11.03-json.md @@ -0,0 +1,181 @@ + +# json 模块:处理 JSON 数据 + +[JSON (JavaScript Object Notation)](http://json.org) 是一种轻量级的数据交换格式,易于人阅读和编写,同时也易于机器解析和生成。 + +## JSON 基础 + +`JSON` 的基础结构有两种:键值对 (`name/value pairs`) 和数组 (`array`)。 + +`JSON` 具有以下形式: + +- `object` - 对象,用花括号表示,形式为(数据是无序的): + - `{ pair_1, pair_2, ..., pair_n }` +- `pair` - 键值对,形式为: + - `string : value` +- `array` - 数组,用中括号表示,形式为(数据是有序的): + - `[value_1, value_2, ..., value_n ]` +- `value` - 值,可以是 + - `string` 字符串 + - `number` 数字 + - `object` 对象 + - `array` 数组 + - `true / false / null` 特殊值 +- `string` 字符串 + +例子: + +```json +{ + "name": "echo", + "age": 24, + "coding skills": ["python", "matlab", "java", "c", "c++", "ruby", "scala"], + "ages for school": { + "primary school": 6, + "middle school": 9, + "high school": 15, + "university": 18 + }, + "hobby": ["sports", "reading"], + "married": false +} +``` + +## JSON 与 Python 的转换 + +假设我们已经将上面这个 `JSON` 对象写入了一个字符串: + + +```python +import json +from pprint import pprint + +info_string = """ +{ + "name": "echo", + "age": 24, + "coding skills": ["python", "matlab", "java", "c", "c++", "ruby", "scala"], + "ages for school": { + "primary school": 6, + "middle school": 9, + "high school": 15, + "university": 18 + }, + "hobby": ["sports", "reading"], + "married": false +} +""" +``` + +我们可以用 `json.loads()` (load string) 方法从字符串中读取 `JSON` 数据: + + +```python +info = json.loads(info_string) + +pprint(info) +``` + + {u'age': 24, + u'ages for school': {u'high school': 15, + u'middle school': 9, + u'primary school': 6, + u'university': 18}, + u'coding skills': [u'python', + u'matlab', + u'java', + u'c', + u'c++', + u'ruby', + u'scala'], + u'hobby': [u'sports', u'reading'], + u'married': False, + u'name': u'echo'} + + +此时,我们将原来的 `JSON` 数据变成了一个 `Python` 对象,在我们的例子中这个对象是个字典(也可能是别的类型,比如列表): + + +```python +type(info) +``` + + + + + dict + + + +可以使用 `json.dumps()` 将一个 `Python` 对象变成 `JSON` 对象: + + +```python +info_json = json.dumps(info) + +print info_json +``` + + {"name": "echo", "age": 24, "married": false, "ages for school": {"middle school": 9, "university": 18, "high school": 15, "primary school": 6}, "coding skills": ["python", "matlab", "java", "c", "c++", "ruby", "scala"], "hobby": ["sports", "reading"]} + + +从中我们可以看到,生成的 `JSON` 字符串中,数组的元素顺序是不变的(始终是 `["python", "matlab", "java", "c", "c++", "ruby", "scala"]`),而对象的元素顺序是不确定的。 + +## 生成和读取 JSON 文件 + +与 `pickle` 类似,我们可以直接从文件中读取 `JSON` 数据,也可以将对象保存为 `JSON` 格式。 + +- `json.dump(obj, file)` 将对象保存为 JSON 格式的文件 +- `json.load(file)` 从 JSON 文件中读取数据 + + +```python +with open("info.json", "w") as f: + json.dump(info, f) +``` + +可以查看 `info.json` 的内容: + + +```python +with open("info.json") as f: + print f.read() +``` + + {"name": "echo", "age": 24, "married": false, "ages for school": {"middle school": 9, "university": 18, "high school": 15, "primary school": 6}, "coding skills": ["python", "matlab", "java", "c", "c++", "ruby", "scala"], "hobby": ["sports", "reading"]} + + +从文件中读取数据: + + +```python +with open("info.json") as f: + info_from_file = json.load(f) + +pprint(info_from_file) +``` + + {u'age': 24, + u'ages for school': {u'high school': 15, + u'middle school': 9, + u'primary school': 6, + u'university': 18}, + u'coding skills': [u'python', + u'matlab', + u'java', + u'c', + u'c++', + u'ruby', + u'scala'], + u'hobby': [u'sports', u'reading'], + u'married': False, + u'name': u'echo'} + + +删除生成的文件: + + +```python +import os +os.remove("info.json") +``` diff --git a/docs/11-useful-tools/11.04-glob.md b/docs/11-useful-tools/11.04-glob.md new file mode 100644 index 00000000..a6710949 --- /dev/null +++ b/docs/11-useful-tools/11.04-glob.md @@ -0,0 +1,84 @@ + +# glob 模块:文件模式匹配 + + +```python +import glob +``` + +`glob` 模块提供了方便的文件模式匹配方法。 + +例如,找到所有以 `.ipynb` 结尾的文件名: + + +```python +glob.glob("*.ipynb") +``` + + + + + ['11.03 json.ipynb', + '11.01 pprint.ipynb', + '11.02 pickle and cpickle.ipynb', + '11.04 glob.ipynb'] + + + +`glob` 函数支持三种格式的语法: + +- `*` 匹配单个或多个字符 +- `?` 匹配任意单个字符 +- `[]` 匹配指定范围内的字符,如:[0-9]匹配数字。 + +假设我们要匹配第 09 节所有的 `.ipynb` 文件: + + +```python +glob.glob("../09*/*.ipynb") +``` + + + + + ['../09. theano/09.05 configuration settings and compiling modes.ipynb', + '../09. theano/09.03 gpu on windows.ipynb', + '../09. theano/09.07 loop with scan.ipynb', + '../09. theano/09.13 modern net on mnist.ipynb', + '../09. theano/09.11 net on mnist.ipynb', + '../09. theano/09.09 logistic regression .ipynb', + '../09. theano/09.10 softmax on mnist.ipynb', + '../09. theano/09.01 introduction and installation.ipynb', + '../09. theano/09.02 theano basics.ipynb', + '../09. theano/09.12 random streams.ipynb', + '../09. theano/09.04 graph structures.ipynb', + '../09. theano/09.14 convolutional net on mnist.ipynb', + '../09. theano/09.08 linear regression.ipynb', + '../09. theano/09.15 tensor module.ipynb', + '../09. theano/09.06 conditions in theano.ipynb'] + + + +匹配数字开头的文件夹名: + + +```python +glob.glob("../[0-9]*") +``` + + + + + ['../04. scipy', + '../02. python essentials', + '../07. interfacing with other languages', + '../11. useful tools', + '../05. advanced python', + '../10. something interesting', + '../03. numpy', + '../06. matplotlib', + '../08. object-oriented programming', + '../01. python tools', + '../09. theano'] + + diff --git a/docs/11-useful-tools/11.05-shutil.md b/docs/11-useful-tools/11.05-shutil.md new file mode 100644 index 00000000..806cc782 --- /dev/null +++ b/docs/11-useful-tools/11.05-shutil.md @@ -0,0 +1,147 @@ + +# shutil 模块:高级文件操作 + + +```python +import shutil +import os +``` + +`shutil` 是 `Python` 中的高级文件操作模块。 + +## 复制文件 + + +```python +with open("test.file", "w") as f: + pass + +print "test.file" in os.listdir(os.curdir) +``` + + True + + +`shutil.copy(src, dst)` 将源文件复制到目标地址: + + +```python +shutil.copy("test.file", "test.copy.file") + +print "test.file" in os.listdir(os.curdir) +print "test.copy.file" in os.listdir(os.curdir) +``` + + True + True + + +如果目标地址中间的文件夹不存在则会报错: + + +```python +try: + shutil.copy("test.file", "my_test_dir/test.copy.file") +except IOError as msg: + print msg +``` + + [Errno 2] No such file or directory: 'my_test_dir/test.copy.file' + + +另外的一个函数 `shutil.copyfile(src, dst)` 与 `shutil.copy` 使用方法一致,不过只是简单复制文件的内容,并不会复制文件本身的读写可执行权限,而 `shutil.copy` 则是完全复制。 + +## 复制文件夹 + +将文件转移到 `test_dir` 文件夹: + + +```python +os.renames("test.file", "test_dir/test.file") +os.renames("test.copy.file", "test_dir/test.copy.file") +``` + +使用 `shutil.copytree` 来复制文件夹: + + +```python +shutil.copytree("test_dir/", "test_dir_copy/") + +"test_dir_copy" in os.listdir(os.curdir) +``` + + + + + True + + + +## 删除非空文件夹 + +`os.removedirs` 不能删除非空文件夹: + + +```python +try: + os.removedirs("test_dir_copy") +except Exception as msg: + print msg +``` + + [Errno 39] Directory not empty: 'test_dir_copy' + + +使用 `shutil.rmtree` 来删除非空文件夹: + + +```python +shutil.rmtree("test_dir_copy") +``` + +## 移动文件夹 + +`shutil.move` 可以整体移动文件夹,与 `os.rename` 功能差不多。 + +## 产生压缩文件 + +查看支持的压缩文件格式: + + +```python +shutil.get_archive_formats() +``` + + + + + [('bztar', "bzip2'ed tar-file"), + ('gztar', "gzip'ed tar-file"), + ('tar', 'uncompressed tar file'), + ('zip', 'ZIP file')] + + + +产生压缩文件: + +`shutil.make_archive(basename, format, root_dir)` + + +```python +shutil.make_archive("test_archive", "zip", "test_dir/") +``` + + + + + '/home/lijin/notes-python/11. useful tools/test_archive.zip' + + + +清理生成的文件和文件夹: + + +```python +os.remove("test_archive.zip") +shutil.rmtree("test_dir/") +``` diff --git a/docs/11-useful-tools/11.06-gzip,-zipfile,-tarfile.md b/docs/11-useful-tools/11.06-gzip,-zipfile,-tarfile.md new file mode 100644 index 00000000..8257f44d Binary files /dev/null and b/docs/11-useful-tools/11.06-gzip,-zipfile,-tarfile.md differ diff --git a/docs/11-useful-tools/11.07-logging.md b/docs/11-useful-tools/11.07-logging.md new file mode 100644 index 00000000..d77479d4 --- /dev/null +++ b/docs/11-useful-tools/11.07-logging.md @@ -0,0 +1,63 @@ + +# logging 模块:记录日志 + +`logging` 模块可以用来记录日志: + + +```python +import logging +``` + +`logging` 的日志类型有以下几种: + +- `logging.critical(msg)` +- `logging.error(msg)` +- `logging.warning(msg)` +- `logging.info(msg)` +- `logging.debug(msg)` + +级别排序为:`CRITICAL > ERROR > WARNING > INFO > DEBUG > NOTSET` + +默认情况下,`logging` 的日志级别为 `WARNING`,只有不低于 `WARNING` 级别的日志才会显示在命令行。 + + +```python +logging.critical('This is critical message') +logging.error('This is error message') +logging.warning('This is warning message') + +# 不会显示 +logging.info('This is info message') +logging.debug('This is debug message') +``` + + CRITICAL:root:This is critical message + ERROR:root:This is error message + WARNING:root:This is warning message + + +可以这样修改默认的日志级别: + + +```python +logging.root.setLevel(level=logging.INFO) + +logging.info('This is info message') +``` + + INFO:root:This is info message + + +可以通过 `logging.basicConfig()` 函数来改变默认的日志显示方式: + + +```python +logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') + +logger = logging.getLogger("this program") + +logger.critical('This is critical message') +``` + + CRITICAL:this program:This is critical message + diff --git a/docs/11-useful-tools/11.08-string.md b/docs/11-useful-tools/11.08-string.md new file mode 100644 index 00000000..7a2188d4 --- /dev/null +++ b/docs/11-useful-tools/11.08-string.md @@ -0,0 +1,114 @@ + +# string 模块:字符串处理 + + +```python +import string +``` + +标点符号: + + +```python +string.punctuation +``` + + + + + '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' + + + +字母表: + + +```python +print string.letters +print string.ascii_letters +``` + + ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz + abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + + +小写和大写: + + +```python +print string.ascii_lowercase +print string.lowercase + +print string.ascii_uppercase +print string.uppercase +``` + + abcdefghijklmnopqrstuvwxyz + abcdefghijklmnopqrstuvwxyz + ABCDEFGHIJKLMNOPQRSTUVWXYZ + ABCDEFGHIJKLMNOPQRSTUVWXYZ + + + +```python +print string.lower +``` + + + + +数字: + + +```python +string.digits +``` + + + + + '0123456789' + + + +16 进制数字: + + +```python +string.hexdigits +``` + + + + + '0123456789abcdefABCDEF' + + + +每个单词的首字符大写: + + +```python +string.capwords("this is a big world") +``` + + + + + 'This Is A Big World' + + + +将指定的单词放到中央: + + +```python +string.center("test", 20) +``` + + + + + ' test ' + + diff --git a/docs/11-useful-tools/11.09-collections.md b/docs/11-useful-tools/11.09-collections.md new file mode 100644 index 00000000..9818131c --- /dev/null +++ b/docs/11-useful-tools/11.09-collections.md @@ -0,0 +1,133 @@ + +# collections 模块:更多数据结构 + + +```python +import collections +``` + +## 计数器 + +可以使用 `Counter(seq)` 对序列中出现的元素个数进行统计。 + +例如,我们可以统计一段文本中出现的单词及其出现的次数: + + +```python +from string import punctuation + +sentence = "One, two, three, one, two, tree, I come from China." + +words_count = collections.Counter(sentence.translate(None, punctuation).lower().split()) + +print words_count +``` + + Counter({'two': 2, 'one': 2, 'from': 1, 'i': 1, 'tree': 1, 'three': 1, 'china': 1, 'come': 1}) + + +## 双端队列 + +双端队列支持从队头队尾出入队: + + +```python +dq = collections.deque() + +for i in xrange(10): + dq.append(i) + +print dq + +for i in xrange(10): + print dq.pop(), + +print + +for i in xrange(10): + dq.appendleft(i) + +print dq + +for i in xrange(10): + print dq.popleft(), +``` + + deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + 9 8 7 6 5 4 3 2 1 0 + deque([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) + 9 8 7 6 5 4 3 2 1 0 + + +与列表相比,双端队列在队头的操作更快: + + +```python +lst = [] +dq = collections.deque() + +%timeit -n100 lst.insert(0, 10) +%timeit -n100 dq.appendleft(10) +``` + + 100 loops, best of 3: 598 ns per loop + 100 loops, best of 3: 291 ns per loop + + +## 有序字典 + +字典的 `key` 按顺序排列: + + +```python +items = ( + ('A', 1), + ('B', 2), + ('C', 3) +) + +regular_dict = dict(items) +ordered_dict = collections.OrderedDict(items) + +print 'Regular Dict:' +for k, v in regular_dict.items(): + print k, v + +print 'Ordered Dict:' +for k, v in ordered_dict.items(): + print k, v +``` + + Regular Dict: + A 1 + C 3 + B 2 + Ordered Dict: + A 1 + B 2 + C 3 + + +## 带默认值的字典 + +对于 `Python` 自带的词典 `d`,当 `key` 不存在的时候,调用 `d[key]` 会报错,但是 `defaultdict` 可以为这样的 `key` 提供一个指定的默认值,我们只需要在定义时提供默认值的类型即可,如果 `key` 不存在返回指定类型的默认值: + + +```python +dd = collections.defaultdict(list) + +print dd["foo"] + +dd = collections.defaultdict(int) + +print dd["foo"] + +dd = collections.defaultdict(float) + +print dd["foo"] +``` + + [] + 0 + 0.0 + diff --git a/docs/11-useful-tools/11.10-requests.md b/docs/11-useful-tools/11.10-requests.md new file mode 100644 index 00000000..36613964 --- /dev/null +++ b/docs/11-useful-tools/11.10-requests.md @@ -0,0 +1,132 @@ + +# requests 模块:HTTP for Human + + +```python +import requests +``` + +Python 标准库中的 `urllib2` 模块提供了你所需要的大多数 `HTTP` 功能,但是它的 `API` 不是特别方便使用。 + +`requests` 模块号称 `HTTP for Human`,它可以这样使用: + + +```python +r = requests.get("http://httpbin.org/get") +r = requests.post('http://httpbin.org/post', data = {'key':'value'}) +r = requests.put("http://httpbin.org/put") +r = requests.delete("http://httpbin.org/delete") +r = requests.head("http://httpbin.org/get") +r = requests.options("http://httpbin.org/get") +``` + +## 传入 URL 参数 + +假如我们想访问 `httpbin.org/get?key=val`,我们可以使用 `params` 传入这些参数: + + +```python +payload = {'key1': 'value1', 'key2': 'value2'} +r = requests.get("http://httpbin.org/get", params=payload) +``` + +查看 `url` : + + +```python +print(r.url) +``` + + http://httpbin.org/get?key2=value2&key1=value1 + + +## 读取响应内容 + +`Requests` 会自动解码来自服务器的内容。大多数 `unicode` 字符集都能被无缝地解码。 + + +```python +r = requests.get('https://github.com/timeline.json') + +print r.text +``` + + {"message":"Hello there, wayfaring stranger. If you’re reading this then you probably didn’t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"} + + +查看文字编码: + + +```python +r.encoding +``` + + + + + 'utf-8' + + + +每次改变文字编码,`text` 的内容也随之变化: + + +```python +r.encoding = "ISO-8859-1" + +r.text +``` + + + + + u'{"message":"Hello there, wayfaring stranger. If you\xe2\x80\x99re reading this then you probably didn\xe2\x80\x99t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.","documentation_url":"https://developer.github.com/v3/activity/events/#list-public-events"}' + + + +`Requests` 中也有一个内置的 `JSON` 解码器处理 `JSON` 数据: + + +```python +r.json() +``` + + + + + {u'documentation_url': u'https://developer.github.com/v3/activity/events/#list-public-events', + u'message': u'Hello there, wayfaring stranger. If you\xe2\x80\x99re reading this then you probably didn\xe2\x80\x99t see our blog post a couple of years back announcing that this API would go away: http://git.io/17AROg Fear not, you should be able to get what you need from the shiny new Events API instead.'} + + + +如果 `JSON` 解码失败, `r.json` 就会抛出一个异常。 + +## 响应状态码 + + +```python +r = requests.get('http://httpbin.org/get') + +r.status_code +``` + + + + + 407 + + + +## 响应头 + + +```python +r.headers['Content-Type'] +``` + + + + + 'text/html' + + diff --git a/docs/12-pandas/12.01-ten-minutes-to-pandas.md b/docs/12-pandas/12.01-ten-minutes-to-pandas.md new file mode 100644 index 00000000..7432b5c4 --- /dev/null +++ b/docs/12-pandas/12.01-ten-minutes-to-pandas.md @@ -0,0 +1,3836 @@ + +# 十分钟上手 Pandas + +`pandas` 是一个 `Python Data Analysis Library`。 + +安装请参考官网的教程,如果安装了 `Anaconda`,则不需要安装 `pandas` 库。 + + +```python +%matplotlib inline + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +``` + +## 产生 Pandas 对象 + +`pandas` 中有三种基本结构: + +- `Series` + - 1D labeled homogeneously-typed array +- `DataFrame` + - General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed columns +- `Panel` + - General 3D labeled, also size-mutable array + +### Series + +一维 `Series` 可以用一维列表初始化: + + +```python +s = pd.Series([1,3,5,np.nan,6,8]) + +print s +``` + + 0 1 + 1 3 + 2 5 + 3 NaN + 4 6 + 5 8 + dtype: float64 + + +默认情况下,`Series` 的下标都是数字(可以使用额外参数指定),类型是统一的。 + +### DataFrame + +`DataFrame` 则是个二维结构,这里首先构造一组时间序列,作为我们第一维的下标: + + +```python +dates = pd.date_range('20130101', periods=6) + +print dates +``` + + DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], + dtype='datetime64[ns]', freq='D') + + +然后创建一个 `DataFrame` 结构: + + +```python +df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD')) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-01-0.605936-0.861658-1.0019241.528584
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-030.065255-1.608074-1.282331-0.286067
2013-01-041.2893050.497115-0.2253510.040239
2013-01-050.0382320.875057-0.0925260.934432
2013-01-06-2.163453-0.0102791.6998861.291653
+
+ + + +默认情况下,如果不指定 `index` 参数和 `columns`,那么他们的值将用从 `0` 开始的数字替代。 + +除了向 `DataFrame` 中传入二维数组,我们也可以使用字典传入数据: + + +```python +df2 = pd.DataFrame({'A' : 1., + 'B' : pd.Timestamp('20130102'), + 'C' : pd.Series(1,index=list(range(4)),dtype='float32'), + 'D' : np.array([3] * 4,dtype='int32'), + 'E' : pd.Categorical(["test","train","test","train"]), + 'F' : 'foo' }) + +df2 +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDEF
012013-01-0213testfoo
112013-01-0213trainfoo
212013-01-0213testfoo
312013-01-0213trainfoo
+
+ + + +字典的每个 `key` 代表一列,其 `value` 可以是各种能够转化为 `Series` 的对象。 + +与 `Series` 要求所有的类型都一致不同,`DataFrame` 值要求每一列数据的格式相同: + + +```python +df2.dtypes +``` + + + + + A float64 + B datetime64[ns] + C float32 + D int32 + E category + F object + dtype: object + + + +## 查看数据 + +### 头尾数据 + +`head` 和 `tail` 方法可以分别查看最前面几行和最后面几行的数据(默认为 5): + + +```python +df.head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-01-0.605936-0.861658-1.0019241.528584
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-030.065255-1.608074-1.282331-0.286067
2013-01-041.2893050.497115-0.2253510.040239
2013-01-050.0382320.875057-0.0925260.934432
+
+ + + +最后 3 行: + + +```python +df.tail(3) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-041.2893050.497115-0.2253510.040239
2013-01-050.0382320.875057-0.0925260.934432
2013-01-06-2.163453-0.0102791.6998861.291653
+
+ + + +### 下标,列标,数据 + +下标使用 `index` 属性查看: + + +```python +df.index +``` + + + + + DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], + dtype='datetime64[ns]', freq='D') + + + +列标使用 `columns` 属性查看: + + +```python +df.columns +``` + + + + + Index([u'A', u'B', u'C', u'D'], dtype='object') + + + +数据值使用 `values` 查看: + + +```python +df.values +``` + + + + + array([[-0.60593585, -0.86165752, -1.00192387, 1.52858443], + [-0.16540784, 0.38833783, 1.18718697, 1.81981793], + [ 0.06525454, -1.60807414, -1.2823306 , -0.28606716], + [ 1.28930486, 0.49711531, -0.22535143, 0.04023897], + [ 0.03823179, 0.87505664, -0.0925258 , 0.93443212], + [-2.16345271, -0.01027865, 1.69988608, 1.29165337]]) + + + +### 统计数据 + +查看简单的统计数据: + + +```python +df.describe() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
count6.0000006.0000006.0000006.000000
mean-0.257001-0.1199170.0474900.888110
std1.1266570.9387051.1826290.841529
min-2.163453-1.608074-1.282331-0.286067
25%-0.495804-0.648813-0.8077810.263787
50%-0.0635880.189030-0.1589391.113043
75%0.0584990.4699210.8672591.469352
max1.2893050.8750571.6998861.819818
+
+ + + +### 转置 + + +```python +df.T +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
2013-01-01 00:00:002013-01-02 00:00:002013-01-03 00:00:002013-01-04 00:00:002013-01-05 00:00:002013-01-06 00:00:00
A-0.605936-0.1654080.0652551.2893050.038232-2.163453
B-0.8616580.388338-1.6080740.4971150.875057-0.010279
C-1.0019241.187187-1.282331-0.225351-0.0925261.699886
D1.5285841.819818-0.2860670.0402390.9344321.291653
+
+ + + +## 排序 + +`sort_index(axis=0, ascending=True)` 方法按照下标大小进行排序,`axis=0` 表示按第 0 维进行排序。 + + +```python +df.sort_index(ascending=False) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-06-2.163453-0.0102791.6998861.291653
2013-01-050.0382320.875057-0.0925260.934432
2013-01-041.2893050.497115-0.2253510.040239
2013-01-030.065255-1.608074-1.282331-0.286067
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-01-0.605936-0.861658-1.0019241.528584
+
+ + + + +```python +df.sort_index(axis=1, ascending=False) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DCBA
2013-01-011.528584-1.001924-0.861658-0.605936
2013-01-021.8198181.1871870.388338-0.165408
2013-01-03-0.286067-1.282331-1.6080740.065255
2013-01-040.040239-0.2253510.4971151.289305
2013-01-050.934432-0.0925260.8750570.038232
2013-01-061.2916531.699886-0.010279-2.163453
+
+ + + +`sort_values(by, axis=0, ascending=True)` 方法按照 `by` 的值的大小进行排序,例如按照 `B` 列的大小: + + +```python +df.sort_values(by="B") +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-030.065255-1.608074-1.282331-0.286067
2013-01-01-0.605936-0.861658-1.0019241.528584
2013-01-06-2.163453-0.0102791.6998861.291653
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-041.2893050.497115-0.2253510.040239
2013-01-050.0382320.875057-0.0925260.934432
+
+ + + +## 索引 + +虽然 `DataFrame` 支持 `Python/Numpy` 的索引语法,但是推荐使用 `.at, .iat, .loc, .iloc 和 .ix` 方法进行索引。 + +### 读取数据 + +选择单列数据: + + +```python +df["A"] +``` + + + + + 2013-01-01 -0.605936 + 2013-01-02 -0.165408 + 2013-01-03 0.065255 + 2013-01-04 1.289305 + 2013-01-05 0.038232 + 2013-01-06 -2.163453 + Freq: D, Name: A, dtype: float64 + + + +也可以用 `df.A`: + + +```python +df.A +``` + + + + + 2013-01-01 -0.605936 + 2013-01-02 -0.165408 + 2013-01-03 0.065255 + 2013-01-04 1.289305 + 2013-01-05 0.038232 + 2013-01-06 -2.163453 + Freq: D, Name: A, dtype: float64 + + + +使用切片读取多行: + + +```python +df[0:3] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-01-0.605936-0.861658-1.0019241.528584
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-030.065255-1.608074-1.282331-0.286067
+
+ + + +`index` 名字也可以进行切片: + + +```python +df["20130101":"20130103"] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-01-0.605936-0.861658-1.0019241.528584
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-030.065255-1.608074-1.282331-0.286067
+
+ + + +### 使用 `label` 索引 + +`loc` 可以方便的使用 `label` 进行索引: + + +```python +df.loc[dates[0]] +``` + + + + + A -0.605936 + B -0.861658 + C -1.001924 + D 1.528584 + Name: 2013-01-01 00:00:00, dtype: float64 + + + +多列数据: + + +```python +df.loc[:,['A','B']] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AB
2013-01-01-0.605936-0.861658
2013-01-02-0.1654080.388338
2013-01-030.065255-1.608074
2013-01-041.2893050.497115
2013-01-050.0382320.875057
2013-01-06-2.163453-0.010279
+
+ + + +选择多行多列: + + +```python +df.loc['20130102':'20130104',['A','B']] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
AB
2013-01-02-0.1654080.388338
2013-01-030.065255-1.608074
2013-01-041.2893050.497115
+
+ + + +数据降维: + + +```python +df.loc['20130102',['A','B']] +``` + + + + + A -0.165408 + B 0.388338 + Name: 2013-01-02 00:00:00, dtype: float64 + + + +得到标量值: + + +```python +df.loc[dates[0],'B'] +``` + + + + + -0.86165751902832299 + + + +不过得到标量值可以用 `at`,速度更快: + + +```python +%timeit -n100 df.loc[dates[0],'B'] +%timeit -n100 df.at[dates[0],'B'] + +print df.at[dates[0],'B'] +``` + + 100 loops, best of 3: 329 µs per loop + 100 loops, best of 3: 31.1 µs per loop + -0.861657519028 + + +### 使用位置索引 + +`iloc` 使用位置进行索引: + + +```python +df.iloc[3] +``` + + + + + A 1.289305 + B 0.497115 + C -0.225351 + D 0.040239 + Name: 2013-01-04 00:00:00, dtype: float64 + + + +连续切片: + + +```python +df.iloc[3:5,0:2] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + +
AB
2013-01-041.2893050.497115
2013-01-050.0382320.875057
+
+ + + +索引不连续的部分: + + +```python +df.iloc[[1,2,4],[0,2]] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
AC
2013-01-02-0.1654081.187187
2013-01-030.065255-1.282331
2013-01-050.038232-0.092526
+
+ + + +索引整行: + + +```python +df.iloc[1:3,:] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-02-0.1654080.3883381.1871871.819818
2013-01-030.065255-1.608074-1.282331-0.286067
+
+ + + +整列: + + +```python +df.iloc[:, 1:3] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
BC
2013-01-01-0.861658-1.001924
2013-01-020.3883381.187187
2013-01-03-1.608074-1.282331
2013-01-040.497115-0.225351
2013-01-050.875057-0.092526
2013-01-06-0.0102791.699886
+
+ + + +标量值: + + +```python +df.iloc[1,1] +``` + + + + + 0.3883378290420279 + + + +当然,使用 `iat` 索引标量值更快: + + +```python +%timeit -n100 df.iloc[1,1] +%timeit -n100 df.iat[1,1] + +df.iat[1,1] +``` + + 100 loops, best of 3: 236 µs per loop + 100 loops, best of 3: 14.5 µs per loop + + + + + + 0.3883378290420279 + + + +### 布尔型索引 + +所有 `A` 列大于 0 的行: + + +```python +df[df.A > 0] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-030.065255-1.608074-1.282331-0.286067
2013-01-041.2893050.497115-0.2253510.040239
2013-01-050.0382320.875057-0.0925260.934432
+
+ + + +只留下所有大于 0 的数值: + + +```python +df[df > 0] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2013-01-01NaNNaNNaN1.528584
2013-01-02NaN0.3883381.1871871.819818
2013-01-030.065255NaNNaNNaN
2013-01-041.2893050.497115NaN0.040239
2013-01-050.0382320.875057NaN0.934432
2013-01-06NaNNaN1.6998861.291653
+
+ + + +使用 `isin` 方法做 `filter` 过滤: + + +```python +df2 = df.copy() +df2['E'] = ['one', 'one','two','three','four','three'] + +df2 +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDE
2013-01-01-0.605936-0.861658-1.0019241.528584one
2013-01-02-0.1654080.3883381.1871871.819818one
2013-01-030.065255-1.608074-1.282331-0.286067two
2013-01-041.2893050.497115-0.2253510.040239three
2013-01-050.0382320.875057-0.0925260.934432four
2013-01-06-2.163453-0.0102791.6998861.291653three
+
+ + + + +```python +df2[df2['E'].isin(['two','four'])] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDE
2013-01-030.065255-1.608074-1.282331-0.286067two
2013-01-050.0382320.875057-0.0925260.934432four
+
+ + + +### 设定数据的值 + + +```python +s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6)) + +s1 +``` + + + + + 2013-01-02 1 + 2013-01-03 2 + 2013-01-04 3 + 2013-01-05 4 + 2013-01-06 5 + 2013-01-07 6 + Freq: D, dtype: int64 + + + +像字典一样,直接指定 `F` 列的值为 `s1`,此时以 `df` 已有的 `index` 为标准将二者进行合并,`s1` 中没有的 `index` 项设为 `NaN`,多余的项舍去: + + +```python +df['F'] = s1 + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-01-0.605936-0.861658-1.0019241.528584NaN
2013-01-02-0.1654080.3883381.1871871.8198181
2013-01-030.065255-1.608074-1.282331-0.2860672
2013-01-041.2893050.497115-0.2253510.0402393
2013-01-050.0382320.875057-0.0925260.9344324
2013-01-06-2.163453-0.0102791.6998861.2916535
+
+ + + +或者使用 `at` 或 `iat` 修改单个值: + + +```python +df.at[dates[0],'A'] = 0 + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-010.000000-0.861658-1.0019241.528584NaN
2013-01-02-0.1654080.3883381.1871871.8198181
2013-01-030.065255-1.608074-1.282331-0.2860672
2013-01-041.2893050.497115-0.2253510.0402393
2013-01-050.0382320.875057-0.0925260.9344324
2013-01-06-2.163453-0.0102791.6998861.2916535
+
+ + + + +```python +df.iat[0, 1] = 0 + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-010.0000000.000000-1.0019241.528584NaN
2013-01-02-0.1654080.3883381.1871871.8198181
2013-01-030.065255-1.608074-1.282331-0.2860672
2013-01-041.2893050.497115-0.2253510.0402393
2013-01-050.0382320.875057-0.0925260.9344324
2013-01-06-2.163453-0.0102791.6998861.2916535
+
+ + + +设定一整列: + + +```python +df.loc[:,'D'] = np.array([5] * len(df)) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-010.0000000.000000-1.0019245NaN
2013-01-02-0.1654080.3883381.18718751
2013-01-030.065255-1.608074-1.28233152
2013-01-041.2893050.497115-0.22535153
2013-01-050.0382320.875057-0.09252654
2013-01-06-2.163453-0.0102791.69988655
+
+ + + +设定满足条件的数值: + + +```python +df2 = df.copy() + +df2[df2 > 0] = -df2 + +df2 +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-010.0000000.000000-1.001924-5NaN
2013-01-02-0.165408-0.388338-1.187187-5-1
2013-01-03-0.065255-1.608074-1.282331-5-2
2013-01-04-1.289305-0.497115-0.225351-5-3
2013-01-05-0.038232-0.875057-0.092526-5-4
2013-01-06-2.163453-0.010279-1.699886-5-5
+
+ + + +## 缺失数据 + + +```python +df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E']) +df1.loc[dates[0]:dates[1],'E'] = 1 + +df1 +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDFE
2013-01-010.0000000.000000-1.0019245NaN1
2013-01-02-0.1654080.3883381.187187511
2013-01-030.065255-1.608074-1.28233152NaN
2013-01-041.2893050.497115-0.22535153NaN
+
+ + + +丢弃所有缺失数据的行得到的新数据: + + +```python +df1.dropna(how='any') +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
ABCDFE
2013-01-02-0.1654080.3883381.187187511
+
+ + + +填充缺失数据: + + +```python +df1.fillna(value=5) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDFE
2013-01-010.0000000.000000-1.001924551
2013-01-02-0.1654080.3883381.187187511
2013-01-030.065255-1.608074-1.282331525
2013-01-041.2893050.497115-0.225351535
+
+ + + +检查缺失数据的位置: + + +```python +pd.isnull(df1) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDFE
2013-01-01FalseFalseFalseFalseTrueFalse
2013-01-02FalseFalseFalseFalseFalseFalse
2013-01-03FalseFalseFalseFalseFalseTrue
2013-01-04FalseFalseFalseFalseFalseTrue
+
+ + + +## 计算操作 + +### 统计信息 + +每一列的均值: + + +```python +df.mean() +``` + + + + + A -0.156012 + B 0.023693 + C 0.047490 + D 5.000000 + F 3.000000 + dtype: float64 + + + +每一行的均值: + + +```python +df.mean(1) +``` + + + + + 2013-01-01 0.999519 + 2013-01-02 1.482023 + 2013-01-03 0.834970 + 2013-01-04 1.912214 + 2013-01-05 1.964153 + 2013-01-06 1.905231 + Freq: D, dtype: float64 + + + +多个对象之间的操作,如果维度不对,`pandas` 会自动调用 `broadcasting` 机制: + + +```python +s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2) + +print s +``` + + 2013-01-01 NaN + 2013-01-02 NaN + 2013-01-03 1 + 2013-01-04 3 + 2013-01-05 5 + 2013-01-06 NaN + Freq: D, dtype: float64 + + +相减 `df - s`: + + +```python +df.sub(s, axis='index') +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-01NaNNaNNaNNaNNaN
2013-01-02NaNNaNNaNNaNNaN
2013-01-03-0.934745-2.608074-2.28233141
2013-01-04-1.710695-2.502885-3.22535120
2013-01-05-4.961768-4.124943-5.0925260-1
2013-01-06NaNNaNNaNNaNNaN
+
+ + + +### apply 操作 + +与 `R` 中的 `apply` 操作类似,接收一个函数,默认是对将函数作用到每一列上: + + +```python +df.apply(np.cumsum) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCDF
2013-01-010.0000000.000000-1.0019245NaN
2013-01-02-0.1654080.3883380.185263101
2013-01-03-0.100153-1.219736-1.097067153
2013-01-041.189152-0.722621-1.322419206
2013-01-051.2273830.152436-1.4149452510
2013-01-06-0.9360690.1421570.2849413015
+
+ + + +求每列最大最小值之差: + + +```python +df.apply(lambda x: x.max() - x.min()) +``` + + + + + A 3.452758 + B 2.483131 + C 2.982217 + D 0.000000 + F 4.000000 + dtype: float64 + + + +### 直方图 + + +```python +s = pd.Series(np.random.randint(0, 7, size=10)) +print s +``` + + 0 2 + 1 5 + 2 6 + 3 6 + 4 6 + 5 3 + 6 5 + 7 0 + 8 4 + 9 4 + dtype: int64 + + +直方图信息: + + +```python +print s.value_counts() +``` + + 6 3 + 5 2 + 4 2 + 3 1 + 2 1 + 0 1 + dtype: int64 + + +绘制直方图信息: + + +```python +h = s.hist() +``` + + +![png](output_126_0.png) + + +### 字符串方法 + +当 `Series` 或者 `DataFrame` 的某一列是字符串时,我们可以用 `.str` 对这个字符串数组进行字符串的基本操作: + + +```python +s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) + +print s.str.lower() +``` + + 0 a + 1 b + 2 c + 3 aaba + 4 baca + 5 NaN + 6 caba + 7 dog + 8 cat + dtype: object + + +## 合并 + +### 连接 + + +```python +df = pd.DataFrame(np.random.randn(10, 4)) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0123
0-2.3463730.105651-0.0480270.010637
1-0.6821980.9430430.147312-0.657871
20.515766-0.7682860.3615701.146278
3-0.607277-0.003086-1.4990011.165728
4-1.226279-0.177246-1.379631-0.639261
50.807364-1.8550600.3259681.898831
60.438539-0.728131-0.0099240.398360
71.497457-1.506314-1.5576240.869043
80.945985-0.519435-0.510359-1.077751
91.597679-0.285955-1.0607360.608629
+
+ + + +可以使用 `pd.concat` 函数将多个 `pandas` 对象进行连接: + + +```python +pieces = [df[:2], df[4:5], df[7:]] + +pd.concat(pieces) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0123
0-2.3463730.105651-0.0480270.010637
1-0.6821980.9430430.147312-0.657871
4-1.226279-0.177246-1.379631-0.639261
71.497457-1.506314-1.5576240.869043
80.945985-0.519435-0.510359-1.077751
91.597679-0.285955-1.0607360.608629
+
+ + + +### 数据库中的 Join + +`merge` 可以实现数据库中的 `join` 操作: + + +```python +left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) +right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]}) + +print left +print right +``` + + key lval + 0 foo 1 + 1 foo 2 + key rval + 0 foo 4 + 1 foo 5 + + + +```python +pd.merge(left, right, on='key') +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
keylvalrval
0foo14
1foo15
2foo24
3foo25
+
+ + + +### append + +向 `DataFrame` 中添加行: + + +```python +df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D']) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
01.587778-0.1102970.6022451.212597
1-0.5511090.337387-0.2209190.363332
21.207373-0.1283940.619937-0.612694
3-0.978282-1.0381700.048995-0.788973
40.843893-1.0790210.0922120.485422
5-0.0565941.8312061.910864-1.331739
6-0.487106-1.4953670.8534400.410854
71.830852-0.0148930.2540250.197422
+
+ + + +将第三行的值添加到最后: + + +```python +s = df.iloc[3] + +df.append(s, ignore_index=True) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
01.587778-0.1102970.6022451.212597
1-0.5511090.337387-0.2209190.363332
21.207373-0.1283940.619937-0.612694
3-0.978282-1.0381700.048995-0.788973
40.843893-1.0790210.0922120.485422
5-0.0565941.8312061.910864-1.331739
6-0.487106-1.4953670.8534400.410854
71.830852-0.0148930.2540250.197422
8-0.978282-1.0381700.048995-0.788973
+
+ + + +### Grouping + + +```python +df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B' : ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C' : np.random.randn(8), + 'D' : np.random.randn(8)}) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
0fooone0.7730620.206503
1barone1.414609-0.346719
2footwo0.9641740.706623
3barthree0.182239-1.516509
4footwo-0.0962550.494177
5bartwo-0.759471-0.389213
6fooone-0.257519-1.411693
7foothree-0.1093680.241862
+
+ + + +按照 `A` 的值进行分类: + + +```python +df.groupby('A').sum() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
CD
A
bar0.837377-2.252441
foo1.2740940.237472
+
+ + + +按照 `A, B` 的值进行分类: + + +```python +df.groupby(['A', 'B']).sum() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CD
AB
barone1.414609-0.346719
three0.182239-1.516509
two-0.759471-0.389213
fooone0.515543-1.205191
three-0.1093680.241862
two0.8679191.200800
+
+ + + +## 改变形状 + +### Stack + +产生一个多 `index` 的 `DataFrame`: + + +```python +tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', + 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', + 'one', 'two', 'one', 'two']])) + +index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) +df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B']) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AB
firstsecond
barone-0.1091740.958551
two-0.254743-0.975924
bazone-0.132039-0.119009
two0.587063-0.819037
fooone-0.7541230.430747
two-0.4265440.389822
quxone-0.382501-0.562910
two-0.5292870.826337
+
+ + + +`stack` 方法将 `columns` 变成一个新的 `index` 部分: + + +```python +df2 = df[:4] + +stacked = df2.stack() + +stacked +``` + + + + + first second + bar one A -0.109174 + B 0.958551 + two A -0.254743 + B -0.975924 + baz one A -0.132039 + B -0.119009 + two A 0.587063 + B -0.819037 + dtype: float64 + + + +可以使用 `unstack()` 将最后一级 `index` 放回 `column`: + + +```python +stacked.unstack() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AB
firstsecond
barone-0.1091740.958551
two-0.254743-0.975924
bazone-0.132039-0.119009
two0.587063-0.819037
+
+ + + +也可以指定其他的级别: + + +```python +stacked.unstack(1) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
secondonetwo
first
barA-0.109174-0.254743
B0.958551-0.975924
bazA-0.1320390.587063
B-0.119009-0.819037
+
+ + + +## 时间序列 + +金融分析中常用到时间序列数据: + + +```python +rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D') +ts = pd.Series(np.random.randn(len(rng)), rng) + +ts +``` + + + + + 2012-03-06 1.096788 + 2012-03-07 0.029678 + 2012-03-08 0.511461 + 2012-03-09 -0.332369 + 2012-03-10 1.720321 + Freq: D, dtype: float64 + + + +标准时间表示: + + +```python +ts_utc = ts.tz_localize('UTC') + +ts_utc +``` + + + + + 2012-03-06 00:00:00+00:00 1.096788 + 2012-03-07 00:00:00+00:00 0.029678 + 2012-03-08 00:00:00+00:00 0.511461 + 2012-03-09 00:00:00+00:00 -0.332369 + 2012-03-10 00:00:00+00:00 1.720321 + Freq: D, dtype: float64 + + + +改变时区表示: + + +```python +ts_utc.tz_convert('US/Eastern') +``` + + + + + 2012-03-05 19:00:00-05:00 1.096788 + 2012-03-06 19:00:00-05:00 0.029678 + 2012-03-07 19:00:00-05:00 0.511461 + 2012-03-08 19:00:00-05:00 -0.332369 + 2012-03-09 19:00:00-05:00 1.720321 + Freq: D, dtype: float64 + + + +## Categoricals + + +```python +df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
idraw_grade
01a
12b
23b
34a
45a
56e
+
+ + + +可以将 `grade` 变成类别: + + +```python +df["grade"] = df["raw_grade"].astype("category") + +df["grade"] +``` + + + + + 0 a + 1 b + 2 b + 3 a + 4 a + 5 e + Name: grade, dtype: category + Categories (3, object): [a, b, e] + + + +将类别的表示转化为有意义的字符: + + +```python +df["grade"].cat.categories = ["very good", "good", "very bad"] + +df["grade"] +``` + + + + + 0 very good + 1 good + 2 good + 3 very good + 4 very good + 5 very bad + Name: grade, dtype: category + Categories (3, object): [very good, good, very bad] + + + +添加缺失的类别: + + +```python +df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) +df["grade"] +``` + + + + + 0 very good + 1 good + 2 good + 3 very good + 4 very good + 5 very bad + Name: grade, dtype: category + Categories (5, object): [very bad, bad, medium, good, very good] + + + +使用 `grade` 分组: + + +```python +df.groupby("grade").size() +``` + + + + + grade + very bad 1 + bad 0 + medium 0 + good 2 + very good 3 + dtype: int64 + + + +## 绘图 + +使用 `ggplot` 风格: + + +```python +plt.style.use('ggplot') +``` + +`Series` 绘图: + + +```python +ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) + +p = ts.cumsum().plot() +``` + + +![png](output_181_0.png) + + +`DataFrame` 按照 `columns` 绘图: + + +```python +df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, + columns=['A', 'B', 'C', 'D']) + +df.cumsum().plot() +p = plt.legend(loc="best") +``` + + +![png](output_183_0.png) + + +## 文件读写 + +### csv + +写入文件: + + +```python +df.to_csv('foo.csv') +``` + +从文件中读取: + + +```python +pd.read_csv('foo.csv').head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Unnamed: 0ABCD
02000-01-01-1.0115541.200283-0.310949-1.060734
12000-01-02-1.0308940.660518-0.214002-0.422014
22000-01-03-0.4886921.709209-0.6022081.115456
32000-01-04-0.4402430.8266920.321648-0.351698
42000-01-05-0.1656841.2973030.8172330.174767
+
+ + + +### hdf5 + +写入文件: + + +```python +df.to_hdf("foo.h5", "df") +``` + +读取文件: + + +```python +pd.read_hdf('foo.h5','df').head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2000-01-01-1.0115541.200283-0.310949-1.060734
2000-01-02-1.0308940.660518-0.214002-0.422014
2000-01-03-0.4886921.709209-0.6022081.115456
2000-01-04-0.4402430.8266920.321648-0.351698
2000-01-05-0.1656841.2973030.8172330.174767
+
+ + + +### excel + +写入文件: + + +```python +df.to_excel('foo.xlsx', sheet_name='Sheet1') +``` + +读取文件: + + +```python +pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA']).head() +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ABCD
2000-01-01-1.0115541.200283-0.310949-1.060734
2000-01-02-1.0308940.660518-0.214002-0.422014
2000-01-03-0.4886921.709209-0.6022081.115456
2000-01-04-0.4402430.8266920.321648-0.351698
2000-01-05-0.1656841.2973030.8172330.174767
+
+ + + +清理生成的临时文件: + + +```python +import glob +import os + +for f in glob.glob("foo*"): + os.remove(f) +``` diff --git a/docs/12-pandas/12.02-series-in-pandas.md b/docs/12-pandas/12.02-series-in-pandas.md new file mode 100644 index 00000000..2b11015d --- /dev/null +++ b/docs/12-pandas/12.02-series-in-pandas.md @@ -0,0 +1,392 @@ + +# 一维数据结构:Series + + +```python +import numpy as np +import pandas as pd +``` + +`Series` 是一维带标记的数组结构,可以存储任意类型的数据(整数,浮点数,字符串,`Python` 对象等等)。 + +作为一维结构,它的索引叫做 `index`,基本调用方法为 + + s = pd.Series(data, index=index) + +其中,`data` 可以是以下结构: + +- 字典 +- `ndarray` +- 标量,例如 `5` + +`index` 是一维坐标轴的索引列表。 + +## 从 ndarray 构建 + +如果 `data` 是个 `ndarray`,那么 `index` 的长度必须跟 `data` 一致: + + +```python +s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"]) + +s +``` + + + + + a -0.032806 + b 0.050207 + c -1.909697 + d -1.127865 + e -0.073793 + dtype: float64 + + + +查看 `index`: + + +```python +s.index +``` + + + + + Index([u'a', u'b', u'c', u'd', u'e'], dtype='object') + + + +如果 `index` 为空,那么 `index` 会使用 `[0, ..., len(data) - 1]`: + + +```python +pd.Series(np.random.randn(5)) +``` + + + + + 0 -0.376233 + 1 -0.474349 + 2 1.660590 + 3 0.461434 + 4 0.190965 + dtype: float64 + + + +## 从字典中构造 + +如果 `data` 是个 `dict`,如果不给定 `index`,那么 `index` 将使用 `dict` 的 `key` 排序之后的结果: + + +```python +d = {'a' : 0., 'b' : 1., 'c' : 2.} + +pd.Series(d) +``` + + + + + a 0 + b 1 + c 2 + dtype: float64 + + + +如果给定了 `index`,那么将会按照 `index` 给定的值作为 `key` 从字典中读取相应的 `value`,如果 `key` 不存在,对应的值为 `NaN`(not a number, `Pandas` 中的缺失默认值): + + +```python +pd.Series(d, index=['b', 'd', 'a']) +``` + + + + + b 1 + d NaN + a 0 + dtype: float64 + + + +## 从标量值构造 + +如果 `data` 是标量,那么 `index` 值必须被指定,得到一个值为 `data` 与 `index` 等长的 `Series`: + + +```python +pd.Series(5., index=['a', 'b', 'c', 'd', 'e']) +``` + + + + + a 5 + b 5 + c 5 + d 5 + e 5 + dtype: float64 + + + +## 像 ndarray 一样使用 Series + + +```python +s +``` + + + + + a -0.032806 + b 0.050207 + c -1.909697 + d -1.127865 + e -0.073793 + dtype: float64 + + + +支持数字索引操作: + + +```python +s[0] +``` + + + + + -0.032806330572971713 + + + +切片: + + +```python +s[:3] +``` + + + + + a -0.032806 + b 0.050207 + c -1.909697 + dtype: float64 + + + +`mask` 索引: + + +```python +s[s > s.median()] +``` + + + + + a -0.032806 + b 0.050207 + dtype: float64 + + + +花式索引: + + +```python +s[[4, 3, 1]] +``` + + + + + e -0.073793 + d -1.127865 + b 0.050207 + dtype: float64 + + + +支持 `numpy` 函数: + + +```python +np.exp(s) +``` + + + + + a 0.967726 + b 1.051488 + c 0.148125 + d 0.323724 + e 0.928864 + dtype: float64 + + + +## 像字典一样使用 Series + +也可以像字典一样使用 `Series`: + + +```python +s["a"] +``` + + + + + -0.032806330572971713 + + + +修改数值: + + +```python +s["e"] = 12. + +s +``` + + + + + a -0.032806 + b 0.050207 + c -1.909697 + d -1.127865 + e 12.000000 + dtype: float64 + + + +查询 `key`: + + +```python +"e" in s +``` + + + + + True + + + + +```python +"f" in s +``` + + + + + False + + + +使用 `key` 索引时,如果不确定 `key` 在不在里面,可以用 `get` 方法,如果不存在返回 `None` 或者指定的默认值: + + +```python +s.get("f", np.nan) +``` + + + + + nan + + + +## 向量化操作 + +简单的向量操作与 `ndarray` 的表现一致: + + +```python +s + s +``` + + + + + a -0.065613 + b 0.100413 + c -3.819395 + d -2.255729 + e 24.000000 + dtype: float64 + + + + +```python +s * 2 +``` + + + + + a -0.065613 + b 0.100413 + c -3.819395 + d -2.255729 + e 24.000000 + dtype: float64 + + + +但 `Series` 和 `ndarray` 不同的地方在于,`Series` 的操作默认是使用 `index` 的值进行对齐的,而不是相对位置: + + +```python +s[1:] + s[:-1] +``` + + + + + a NaN + b 0.100413 + c -3.819395 + d -2.255729 + e NaN + dtype: float64 + + + +对于上面两个不能完全对齐的 `Series`,结果的 `index` 是两者 `index` 的并集,同时不能对齐的部分当作缺失值处理。 + +## Name 属性 + +可以在定义时指定 `name` 属性: + + +```python +s = pd.Series(np.random.randn(5), name='something') +s.name +``` + + + + + 'something' + + diff --git a/docs/12-pandas/12.03-dataframe-in-pandas.md b/docs/12-pandas/12.03-dataframe-in-pandas.md new file mode 100644 index 00000000..8fe931ff --- /dev/null +++ b/docs/12-pandas/12.03-dataframe-in-pandas.md @@ -0,0 +1,1007 @@ + +# 二维数据结构:DataFrame + + +```python +import numpy as np +import pandas as pd +``` + +`DataFrame` 是 `pandas` 中的二维数据结构,可以看成一个 `Excel` 中的工作表,或者一个 `SQL` 表,或者一个存储 `Series` 对象的字典。 + +`DataFrame(data, index, columns)` 中的 `data` 可以接受很多数据类型: + +- 一个存储一维数组,字典,列表或者 `Series` 的字典 +- 2-D 数组 +- 结构或者记录数组 +- 一个 `Series` +- 另一个 `DataFrame` + +`index` 用于指定行的 `label`,`columns` 用于指定列的 `label`,如果参数不传入,那么会按照传入的内容进行设定。 + +## 从 Series 字典中构造 + +可以使用值为 `Series` 的字典进行构造: + + +```python +d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} +``` + +如果没有传入 `columns` 的值,那么 `columns` 的值默认为字典 `key`,`index` 默认为所有 `value` 中 `index` 的并集。 + + +```python +df = pd.DataFrame(d) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
onetwo
a11
b22
c33
dNaN4
+
+ + + +如果指定了 `index` 值,`index` 为指定的 `index` 值: + + +```python +pd.DataFrame(d, index=['d', 'b', 'a']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
onetwo
dNaN4
b22
a11
+
+ + + +如果指定了 `columns` 值,会去字典中寻找,找不到的值为 `NaN`: + + +```python +pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
twothree
d4NaN
b2NaN
a1NaN
+
+ + + +查看 `index` 和 `columns`: + + +```python +df.index +``` + + + + + Index([u'a', u'b', u'c', u'd'], dtype='object') + + + + +```python +df.columns +``` + + + + + Index([u'one', u'two'], dtype='object') + + + +## 从 ndarray 或者 list 字典中构造 + +如果字典是 `ndarray` 或者 `list`,那么它们的长度要严格保持一致: + + +```python +d = {'one' : [1., 2., 3., 4.], + 'two' : [4., 3., 2., 1.]} +``` + +`index` 默认为 `range(n)`,其中 `n` 为数组长度: + + +```python +pd.DataFrame(d) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
onetwo
014
123
232
341
+
+ + + +如果传入 `index` 参数,那么它必须与数组等长: + + +```python +pd.DataFrame(d, index=['a', 'b', 'c', 'd']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
onetwo
a14
b23
c32
d41
+
+ + + +## 从结构数组中构造 + +`numpy` 支持结构数组的构造: + + +```python +data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')]) +data[:] = [(1,2.,'Hello'), (2,3.,"World")] + +data +``` + + + + + array([(1, 2.0, 'Hello'), (2, 3.0, 'World')], + dtype=[('A', ' + + + + + + + + + + + + + + + + + + + + + + + +
ABC
012Hello
123World
+ + + + + +```python +pd.DataFrame(data, index=['first', 'second']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
ABC
first12Hello
second23World
+
+ + + + +```python +pd.DataFrame(data, columns=['C', 'A', 'B']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
CAB
0Hello12
1World23
+
+ + + +## 从字典列表中构造 + +字典中同一个键的值会被合并到同一列: + + +```python +data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}] + +pd.DataFrame(data2) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
abc
012NaN
151020
+
+ + + + +```python +pd.DataFrame(data2, index=['first', 'second']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + +
abc
first12NaN
second51020
+
+ + + + +```python +pd.DataFrame(data2, columns=['a', 'b']) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + +
ab
012
1510
+
+ + + +## 从 Series 中构造 + +相当于将 Series 二维化。 + +## 其他构造方法 + +`DataFrame.from_dict` 从现有的一个字典中构造,`DataFrame.from_records` 从现有的一个记录数组中构造: + + +```python +pd.DataFrame.from_records(data, index='C') +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
AB
C
Hello12
World23
+
+ + + +`DataFrame.from_items` 从字典的 `item` 对构造: + + +```python +pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
AB
014
125
236
+
+ + + +## 列操作 + +`DataFrame` 可以类似于字典一样对列进行操作: + + +```python +df["one"] +``` + + + + + a 1 + b 2 + c 3 + d NaN + Name: one, dtype: float64 + + + +添加新列: + + +```python +df['three'] = df['one'] * df['two'] + +df['flag'] = df['one'] > 2 + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
onetwothreeflag
a111False
b224False
c339True
dNaN4NaNFalse
+
+ + + +可以像字典一样删除: + + +```python +del df["two"] + +three = df.pop("three") + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
oneflag
a1False
b2False
c3True
dNaNFalse
+
+ + + +给一行赋单一值: + + +```python +df['foo'] = 'bar' + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
oneflagfoo
a1Falsebar
b2Falsebar
c3Truebar
dNaNFalsebar
+
+ + + +如果 `index` 不一致,那么会只保留公共的部分: + + +```python +df['one_trunc'] = df['one'][:2] + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
oneflagfooone_trunc
a1Falsebar1
b2Falsebar2
c3TruebarNaN
dNaNFalsebarNaN
+
+ + + +也可以直接插入一维数组,但是数组的长度必须与 `index` 一致。 + +默认新列插入位置在最后,也可以指定位置插入: + + +```python +df.insert(1, 'bar', df['one']) + +df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
onebarflagfooone_trunc
a11Falsebar1
b22Falsebar2
c33TruebarNaN
dNaNNaNFalsebarNaN
+
+ + + +添加一个 `test` 新列: + + +```python +df.assign(test=df["one"] + df["bar"]) +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
onebarflagfooone_trunctest
a11Falsebar12
b22Falsebar24
c33TruebarNaN6
dNaNNaNFalsebarNaNNaN
+
+ + + +## 索引和选择 + +基本操作: + +| Operation | Syntax | Result | +| ---- | ----- | ---- | +| Select column | df[col] | Series | +| Select row by label | df.loc[label] | Series | +| Select row by integer location | df.iloc[loc] | Series | +| Slice rows | df[5:10] | DataFrame | +| Select rows by boolean vector | df[bool_vec] | DataFrame | diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..fd454526 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,153 @@ +- [01. **Python 工具**](01-python-tools) + - [01.01 Python 简介](01-python-tools/01.01-python-overview.md) + - [01.02 Ipython 解释器](01-python-tools/01.02-ipython-interpreter.md) + - [01.03 Ipython notebook](01-python-tools/01.03-ipython-notebook.md) + - [01.04 使用 Anaconda](01-python-tools/01.04-use-anaconda.md) +- [02. **Python 基础**](02-python-essentials) + - [02.01 Python 入门演示](02-python-essentials/02.01-a-tour-of-python.md) + - [02.02 Python 数据类型](02-python-essentials/02.02-python-data-types.md) + - [02.03 数字](02-python-essentials/02.03-numbers.md) + - [02.04 字符串](02-python-essentials/02.04-strings.md) + - [02.05 索引和分片](02-python-essentials/02.05-indexing-and-slicing.md) + - [02.06 列表](02-python-essentials/02.06-lists.md) + - [02.07 可变和不可变类型](02-python-essentials/02.07-mutable-and-immutable-data-types.md) + - [02.08 元组](02-python-essentials/02.08-tuples.md) + - [02.09 列表与元组的速度比较](02-python-essentials/02.09-speed-comparison-between-list-&-tuple.md) + - [02.10 字典](02-python-essentials/02.10-dictionaries.md) + - [02.11 集合](02-python-essentials/02.11-sets.md) + - [02.12 不可变集合](02-python-essentials/02.12-frozen-sets.md) + - [02.13 Python 赋值机制](02-python-essentials/02.13-how-python-assignment-works.md) + - [02.14 判断语句](02-python-essentials/02.14-if-statement.md) + - [02.15 循环](02-python-essentials/02.15-loops.md) + - [02.16 列表推导式](02-python-essentials/02.16-list-comprehension.md) + - [02.17 函数](02-python-essentials/02.17-functions.md) + - [02.18 模块和包](02-python-essentials/02.18-modules-and-packages.md) + - [02.19 异常](02-python-essentials/02.19-exceptions.md) + - [02.20 警告](02-python-essentials/02.20-warnings.md) + - [02.21 文件读写](02-python-essentials/02.21-file-IO.md) +- [03. **Numpy**](03-numpy) + - [03.01 Numpy 简介](03-numpy/03.01-numpy-overview.md) + - [03.02 Matplotlib 基础](03-numpy/03.02-matplotlib-basics.md) + - [03.03 Numpy 数组及其索引](03-numpy/03.03-numpy-arrays.md) + - [03.04 数组类型](03-numpy/03.04-array-types.md) + - [03.05 数组方法](03-numpy/03.05-array-calculation-method.md) + - [03.06 数组排序](03-numpy/03.06-sorting-numpy-arrays.md) + - [03.07 数组形状](03-numpy/03.07-array-shapes.md) + - [03.08 对角线](03-numpy/03.08-diagonals.md) + - [03.09 数组与字符串的转换](03-numpy/03.09-data-to-&-from-string.md) + - [03.10 数组属性方法总结](03-numpy/03.10-array-attribute-&-method-overview-.md) + - [03.11 生成数组的函数](03-numpy/03.11-array-creation-functions.md) + - [03.12 矩阵](03-numpy/03.12-matrix-object.md) + - [03.13 一般函数](03-numpy/03.13-general-functions.md) + - [03.14 向量化函数](03-numpy/03.14-vectorizing-functions.md) + - [03.15 二元运算](03-numpy/03.15-binary-operators.md) + - [03.16 ufunc 对象](03-numpy/03.16-universal-functions.md) + - [03.17 choose 函数实现条件筛选](03-numpy/03.17-choose.md) + - [03.18 数组广播机制](03-numpy/03.18-array-broadcasting.md) + - [03.19 数组读写](03-numpy/03.19-reading-and-writing-arrays.md) + - [03.20 结构化数组](03-numpy/03.20-structured-arrays.md) + - [03.21 记录数组](03-numpy/03.21-record-arrays.md) + - [03.22 内存映射](03-numpy/03.22-memory-maps.md) + - [03.23 从 Matlab 到 Numpy](03-numpy/03.23-from-matlab-to-numpy.md) +- [04. **Scipy**](04-scipy) + - [04.01 SCIentific PYthon 简介](04-scipy/04.01-scienticfic-python-overview.md) + - [04.02 插值](04-scipy/04.02-interpolation-with-scipy.md) + - [04.03 概率统计方法](04-scipy/04.03-statistics-with-scipy.md) + - [04.04 曲线拟合](04-scipy/04.04-curve-fitting.md) + - [04.05 最小化函数](04-scipy/04.05-minimization-in-python.md) + - [04.06 积分](04-scipy/04.06-integration-in-python.md) + - [04.07 解微分方程](04-scipy/04.07-ODEs.md) + - [04.08 稀疏矩阵](04-scipy/04.08-sparse-matrix.md) + - [04.09 线性代数](04-scipy/04.09-linear-algbra.md) + - [04.10 稀疏矩阵的线性代数](04-scipy/04.10-sparse-linear-algebra.md) +- [05. **Python 进阶**](05-advanced-python) + - [05.01 sys 模块简介](05-advanced-python/05.01-overview-of-the-sys-module.md) + - [05.02 与操作系统进行交互:os 模块](05-advanced-python/05.02-interacting-with-the-OS---os.md) + - [05.03 CSV 文件和 csv 模块](05-advanced-python/05.03-comma-separated-values.md) + - [05.04 正则表达式和 re 模块](05-advanced-python/05.04-regular-expression.md) + - [05.05 datetime 模块](05-advanced-python/05.05-datetime.md) + - [05.06 SQL 数据库](05-advanced-python/05.06-sql-databases.md) + - [05.07 对象关系映射](05-advanced-python/05.07-object-relational-mappers.md) + - [05.08 函数进阶:参数传递,高阶函数,lambda 匿名函数,global 变量,递归](05-advanced-python/05.08-functions.md) + - [05.09 迭代器](05-advanced-python/05.09-iterators.md) + - [05.10 生成器](05-advanced-python/05.10-generators.md) + - [05.11 with 语句和上下文管理器](05-advanced-python/05.11-context-managers-and-the-with-statement.md) + - [05.12 修饰符](05-advanced-python/05.12-decorators.md) + - [05.13 修饰符的使用](05-advanced-python/05.13-decorator-usage.md) + - [05.14 operator, functools, itertools, toolz, fn, funcy 模块](05-advanced-python/05.14-the-operator-functools-itertools-toolz-fn-funcy-module.md) + - [05.15 作用域](05-advanced-python/05.15-scope.md) + - [05.16 动态编译](05-advanced-python/05.16-dynamic-code-execution.md) +- [06. **Matplotlib**](06-matplotlib) + - [06.01 Pyplot 教程](06-matplotlib/06.01-pyplot-tutorial.md) + - [06.02 使用 style 来配置 pyplot 风格](06-matplotlib/06.02-customizing-plots-with-style-sheets.md) + - [06.03 处理文本(基础)](06-matplotlib/06.03-working-with-text---basic.md) + - [06.04 处理文本(数学表达式)](06-matplotlib/06.04-working-with-text---math-expression.md) + - [06.05 图像基础](06-matplotlib/06.05-image-tutorial.md) + - [06.06 注释](06-matplotlib/06.06-annotating-axes.md) + - [06.07 标签](06-matplotlib/06.07-legend.md) + - [06.08 figures, subplots, axes 和 ticks 对象](06-matplotlib/06.08-figures,-subplots,-axes-and-ticks.md) + - [06.09 不要迷信默认设置](06-matplotlib/06.09-do-not-trust-the-defaults.md) + - [06.10 各种绘图实例](06-matplotlib/06.10-different-plots.md) +- [07. **使用其他语言进行扩展**](07-interfacing-with-other-languages) + - [07.01 简介](07-interfacing-with-other-languages/07.01-introduction.md) + - [07.02 Python 扩展模块](07-interfacing-with-other-languages/07.02-python-extension-modules.md) + - [07.03 Cython:Cython 基础,将源代码转换成扩展模块](07-interfacing-with-other-languages/07.03-cython-part-1.md) + - [07.04 Cython:Cython 语法,调用其他C库](07-interfacing-with-other-languages/07.04-cython-part-2.md) + - [07.05 Cython:class 和 cdef class,使用 C++](07-interfacing-with-other-languages/07.05-cython-part-3.md) + - [07.06 Cython:Typed memoryviews](07-interfacing-with-other-languages/07.06-cython-part-4.md) + - [07.07 生成编译注释](07-interfacing-with-other-languages/07.07-profiling-with-annotations.md) + - [07.08 ctypes](07-interfacing-with-other-languages/07.08-ctypes.md) +- [08. **面向对象编程**](08-object-oriented-programming) + - [08.01 简介](08-object-oriented-programming/08.01-oop-introduction.md) + - [08.02 使用 OOP 对森林火灾建模](08-object-oriented-programming/08.02-using-oop-model-a-forest-fire.md) + - [08.03 什么是对象?](08-object-oriented-programming/08.03-what-is-a-object.md) + - [08.04 定义 class](08-object-oriented-programming/08.04-writing-classes.md) + - [08.05 特殊方法](08-object-oriented-programming/08.05-special-method.md) + - [08.06 属性](08-object-oriented-programming/08.06-properties.md) + - [08.07 森林火灾模拟](08-object-oriented-programming/08.07-forest-fire-simulation.md) + - [08.08 继承](08-object-oriented-programming/08.08-inheritance.md) + - [08.09 super() 函数](08-object-oriented-programming/08.09-super.md) + - [08.10 重定义森林火灾模拟](08-object-oriented-programming/08.10-refactoring-the-forest-fire-simutation.md) + - [08.11 接口](08-object-oriented-programming/08.11-interfaces.md) + - [08.12 共有,私有和特殊方法和属性](08-object-oriented-programming/08.12-public-private-special-in-python.md) + - [08.13 多重继承](08-object-oriented-programming/08.13-multiple-inheritance.md) +- [09. **Theano 基础**](09-theano) + - [09.01 Theano 简介及其安装](09-theano/09.01-introduction-and-installation.md) + - [09.02 Theano 基础](09-theano/09.02-theano-basics.md) + - [09.03 Theano 在 Windows 上的配置](09-theano/09.03-gpu-on-windows.md) + - [09.04 Theano 符号图结构](09-theano/09.04-graph-structures.md) + - [09.05 Theano 配置和编译模式](09-theano/09.05-configuration-settings-and-compiling-modes.md) + - [09.06 Theano 条件语句](09-theano/09.06-conditions-in-theano.md) + - [09.07 Theano 循环:scan(详解)](09-theano/09.07-loop-with-scan.md) + - [09.08 Theano 实例:线性回归](09-theano/09.08-linear-regression.md) + - [09.09 Theano 实例:Logistic 回归](09-theano/09.09-logistic-regression-.md) + - [09.10 Theano 实例:Softmax 回归](09-theano/09.10-softmax-on-mnist.md) + - [09.11 Theano 实例:人工神经网络](09-theano/09.11-net-on-mnist.md) + - [09.12 Theano 随机数流变量](09-theano/09.12-random-streams.md) + - [09.13 Theano 实例:更复杂的网络](09-theano/09.13-modern-net-on-mnist.md) + - [09.14 Theano 实例:卷积神经网络](09-theano/09.14-convolutional-net-on-mnist.md) + - [09.15 Theano tensor 模块:基础](09-theano/09.15-tensor-basics.md) + - [09.16 Theano tensor 模块:索引](09-theano/09.16-tensor-indexing.md) + - [09.17 Theano tensor 模块:操作符和逐元素操作](09-theano/09.17-tensor-operator-and-elementwise-operations.md) + - [09.18 Theano tensor 模块:nnet 子模块](09-theano/09.18-tensor-nnet-.md) + - [09.19 Theano tensor 模块:conv 子模块](09-theano/09.19-tensor-conv.md) +- [10. **有趣的第三方模块**](10-something-interesting) + - [10.01 使用 basemap 画地图](10-something-interesting/10.01-maps-using-basemap.md) + - [10.02 使用 cartopy 画地图](10-something-interesting/10.02-maps-using-cartopy.md) + - [10.03 探索 NBA 数据](10-something-interesting/10.03-nba-data.md) + - [10.04 金庸的武侠世界](10-something-interesting/10.04-louis-cha's-kungfu-world.md) +- [11. **有用的工具**](11-useful-tools) + - [11.01 pprint 模块:打印 Python 对象](11-useful-tools/11.01-pprint.md) + - [11.02 pickle, cPickle 模块:序列化 Python 对象](11-useful-tools/11.02-pickle-and-cPickle.md) + - [11.03 json 模块:处理 JSON 数据](11-useful-tools/11.03-json.md) + - [11.04 glob 模块:文件模式匹配](11-useful-tools/11.04-glob.md) + - [11.05 shutil 模块:高级文件操作](11-useful-tools/11.05-shutil.md) + - [11.06 gzip, zipfile, tarfile 模块:处理压缩文件](11-useful-tools/11.06-gzip,-zipfile,-tarfile.md) + - [11.07 logging 模块:记录日志](11-useful-tools/11.07-logging.md) + - [11.08 string 模块:字符串处理](11-useful-tools/11.08-string.md) + - [11.09 collections 模块:更多数据结构](11-useful-tools/11.09-collections.md) + - [11.10 requests 模块:HTTP for Human](11-useful-tools/11.10-requests.md) +- [12. **Pandas**](12-pandas) + - [12.01 十分钟上手 Pandas](12-pandas/12.01-ten-minutes-to-pandas.md) + - [12.02 一维数据结构:Series](12-pandas/12.02-series-in-pandas.md) + - [12.03 二维数据结构:DataFrame](12-pandas/12.03-dataframe-in-pandas.md) \ No newline at end of file diff --git a/docs/_sidebar.md b/docs/_sidebar.md new file mode 100755 index 00000000..fd454526 --- /dev/null +++ b/docs/_sidebar.md @@ -0,0 +1,153 @@ +- [01. **Python 工具**](01-python-tools) + - [01.01 Python 简介](01-python-tools/01.01-python-overview.md) + - [01.02 Ipython 解释器](01-python-tools/01.02-ipython-interpreter.md) + - [01.03 Ipython notebook](01-python-tools/01.03-ipython-notebook.md) + - [01.04 使用 Anaconda](01-python-tools/01.04-use-anaconda.md) +- [02. **Python 基础**](02-python-essentials) + - [02.01 Python 入门演示](02-python-essentials/02.01-a-tour-of-python.md) + - [02.02 Python 数据类型](02-python-essentials/02.02-python-data-types.md) + - [02.03 数字](02-python-essentials/02.03-numbers.md) + - [02.04 字符串](02-python-essentials/02.04-strings.md) + - [02.05 索引和分片](02-python-essentials/02.05-indexing-and-slicing.md) + - [02.06 列表](02-python-essentials/02.06-lists.md) + - [02.07 可变和不可变类型](02-python-essentials/02.07-mutable-and-immutable-data-types.md) + - [02.08 元组](02-python-essentials/02.08-tuples.md) + - [02.09 列表与元组的速度比较](02-python-essentials/02.09-speed-comparison-between-list-&-tuple.md) + - [02.10 字典](02-python-essentials/02.10-dictionaries.md) + - [02.11 集合](02-python-essentials/02.11-sets.md) + - [02.12 不可变集合](02-python-essentials/02.12-frozen-sets.md) + - [02.13 Python 赋值机制](02-python-essentials/02.13-how-python-assignment-works.md) + - [02.14 判断语句](02-python-essentials/02.14-if-statement.md) + - [02.15 循环](02-python-essentials/02.15-loops.md) + - [02.16 列表推导式](02-python-essentials/02.16-list-comprehension.md) + - [02.17 函数](02-python-essentials/02.17-functions.md) + - [02.18 模块和包](02-python-essentials/02.18-modules-and-packages.md) + - [02.19 异常](02-python-essentials/02.19-exceptions.md) + - [02.20 警告](02-python-essentials/02.20-warnings.md) + - [02.21 文件读写](02-python-essentials/02.21-file-IO.md) +- [03. **Numpy**](03-numpy) + - [03.01 Numpy 简介](03-numpy/03.01-numpy-overview.md) + - [03.02 Matplotlib 基础](03-numpy/03.02-matplotlib-basics.md) + - [03.03 Numpy 数组及其索引](03-numpy/03.03-numpy-arrays.md) + - [03.04 数组类型](03-numpy/03.04-array-types.md) + - [03.05 数组方法](03-numpy/03.05-array-calculation-method.md) + - [03.06 数组排序](03-numpy/03.06-sorting-numpy-arrays.md) + - [03.07 数组形状](03-numpy/03.07-array-shapes.md) + - [03.08 对角线](03-numpy/03.08-diagonals.md) + - [03.09 数组与字符串的转换](03-numpy/03.09-data-to-&-from-string.md) + - [03.10 数组属性方法总结](03-numpy/03.10-array-attribute-&-method-overview-.md) + - [03.11 生成数组的函数](03-numpy/03.11-array-creation-functions.md) + - [03.12 矩阵](03-numpy/03.12-matrix-object.md) + - [03.13 一般函数](03-numpy/03.13-general-functions.md) + - [03.14 向量化函数](03-numpy/03.14-vectorizing-functions.md) + - [03.15 二元运算](03-numpy/03.15-binary-operators.md) + - [03.16 ufunc 对象](03-numpy/03.16-universal-functions.md) + - [03.17 choose 函数实现条件筛选](03-numpy/03.17-choose.md) + - [03.18 数组广播机制](03-numpy/03.18-array-broadcasting.md) + - [03.19 数组读写](03-numpy/03.19-reading-and-writing-arrays.md) + - [03.20 结构化数组](03-numpy/03.20-structured-arrays.md) + - [03.21 记录数组](03-numpy/03.21-record-arrays.md) + - [03.22 内存映射](03-numpy/03.22-memory-maps.md) + - [03.23 从 Matlab 到 Numpy](03-numpy/03.23-from-matlab-to-numpy.md) +- [04. **Scipy**](04-scipy) + - [04.01 SCIentific PYthon 简介](04-scipy/04.01-scienticfic-python-overview.md) + - [04.02 插值](04-scipy/04.02-interpolation-with-scipy.md) + - [04.03 概率统计方法](04-scipy/04.03-statistics-with-scipy.md) + - [04.04 曲线拟合](04-scipy/04.04-curve-fitting.md) + - [04.05 最小化函数](04-scipy/04.05-minimization-in-python.md) + - [04.06 积分](04-scipy/04.06-integration-in-python.md) + - [04.07 解微分方程](04-scipy/04.07-ODEs.md) + - [04.08 稀疏矩阵](04-scipy/04.08-sparse-matrix.md) + - [04.09 线性代数](04-scipy/04.09-linear-algbra.md) + - [04.10 稀疏矩阵的线性代数](04-scipy/04.10-sparse-linear-algebra.md) +- [05. **Python 进阶**](05-advanced-python) + - [05.01 sys 模块简介](05-advanced-python/05.01-overview-of-the-sys-module.md) + - [05.02 与操作系统进行交互:os 模块](05-advanced-python/05.02-interacting-with-the-OS---os.md) + - [05.03 CSV 文件和 csv 模块](05-advanced-python/05.03-comma-separated-values.md) + - [05.04 正则表达式和 re 模块](05-advanced-python/05.04-regular-expression.md) + - [05.05 datetime 模块](05-advanced-python/05.05-datetime.md) + - [05.06 SQL 数据库](05-advanced-python/05.06-sql-databases.md) + - [05.07 对象关系映射](05-advanced-python/05.07-object-relational-mappers.md) + - [05.08 函数进阶:参数传递,高阶函数,lambda 匿名函数,global 变量,递归](05-advanced-python/05.08-functions.md) + - [05.09 迭代器](05-advanced-python/05.09-iterators.md) + - [05.10 生成器](05-advanced-python/05.10-generators.md) + - [05.11 with 语句和上下文管理器](05-advanced-python/05.11-context-managers-and-the-with-statement.md) + - [05.12 修饰符](05-advanced-python/05.12-decorators.md) + - [05.13 修饰符的使用](05-advanced-python/05.13-decorator-usage.md) + - [05.14 operator, functools, itertools, toolz, fn, funcy 模块](05-advanced-python/05.14-the-operator-functools-itertools-toolz-fn-funcy-module.md) + - [05.15 作用域](05-advanced-python/05.15-scope.md) + - [05.16 动态编译](05-advanced-python/05.16-dynamic-code-execution.md) +- [06. **Matplotlib**](06-matplotlib) + - [06.01 Pyplot 教程](06-matplotlib/06.01-pyplot-tutorial.md) + - [06.02 使用 style 来配置 pyplot 风格](06-matplotlib/06.02-customizing-plots-with-style-sheets.md) + - [06.03 处理文本(基础)](06-matplotlib/06.03-working-with-text---basic.md) + - [06.04 处理文本(数学表达式)](06-matplotlib/06.04-working-with-text---math-expression.md) + - [06.05 图像基础](06-matplotlib/06.05-image-tutorial.md) + - [06.06 注释](06-matplotlib/06.06-annotating-axes.md) + - [06.07 标签](06-matplotlib/06.07-legend.md) + - [06.08 figures, subplots, axes 和 ticks 对象](06-matplotlib/06.08-figures,-subplots,-axes-and-ticks.md) + - [06.09 不要迷信默认设置](06-matplotlib/06.09-do-not-trust-the-defaults.md) + - [06.10 各种绘图实例](06-matplotlib/06.10-different-plots.md) +- [07. **使用其他语言进行扩展**](07-interfacing-with-other-languages) + - [07.01 简介](07-interfacing-with-other-languages/07.01-introduction.md) + - [07.02 Python 扩展模块](07-interfacing-with-other-languages/07.02-python-extension-modules.md) + - [07.03 Cython:Cython 基础,将源代码转换成扩展模块](07-interfacing-with-other-languages/07.03-cython-part-1.md) + - [07.04 Cython:Cython 语法,调用其他C库](07-interfacing-with-other-languages/07.04-cython-part-2.md) + - [07.05 Cython:class 和 cdef class,使用 C++](07-interfacing-with-other-languages/07.05-cython-part-3.md) + - [07.06 Cython:Typed memoryviews](07-interfacing-with-other-languages/07.06-cython-part-4.md) + - [07.07 生成编译注释](07-interfacing-with-other-languages/07.07-profiling-with-annotations.md) + - [07.08 ctypes](07-interfacing-with-other-languages/07.08-ctypes.md) +- [08. **面向对象编程**](08-object-oriented-programming) + - [08.01 简介](08-object-oriented-programming/08.01-oop-introduction.md) + - [08.02 使用 OOP 对森林火灾建模](08-object-oriented-programming/08.02-using-oop-model-a-forest-fire.md) + - [08.03 什么是对象?](08-object-oriented-programming/08.03-what-is-a-object.md) + - [08.04 定义 class](08-object-oriented-programming/08.04-writing-classes.md) + - [08.05 特殊方法](08-object-oriented-programming/08.05-special-method.md) + - [08.06 属性](08-object-oriented-programming/08.06-properties.md) + - [08.07 森林火灾模拟](08-object-oriented-programming/08.07-forest-fire-simulation.md) + - [08.08 继承](08-object-oriented-programming/08.08-inheritance.md) + - [08.09 super() 函数](08-object-oriented-programming/08.09-super.md) + - [08.10 重定义森林火灾模拟](08-object-oriented-programming/08.10-refactoring-the-forest-fire-simutation.md) + - [08.11 接口](08-object-oriented-programming/08.11-interfaces.md) + - [08.12 共有,私有和特殊方法和属性](08-object-oriented-programming/08.12-public-private-special-in-python.md) + - [08.13 多重继承](08-object-oriented-programming/08.13-multiple-inheritance.md) +- [09. **Theano 基础**](09-theano) + - [09.01 Theano 简介及其安装](09-theano/09.01-introduction-and-installation.md) + - [09.02 Theano 基础](09-theano/09.02-theano-basics.md) + - [09.03 Theano 在 Windows 上的配置](09-theano/09.03-gpu-on-windows.md) + - [09.04 Theano 符号图结构](09-theano/09.04-graph-structures.md) + - [09.05 Theano 配置和编译模式](09-theano/09.05-configuration-settings-and-compiling-modes.md) + - [09.06 Theano 条件语句](09-theano/09.06-conditions-in-theano.md) + - [09.07 Theano 循环:scan(详解)](09-theano/09.07-loop-with-scan.md) + - [09.08 Theano 实例:线性回归](09-theano/09.08-linear-regression.md) + - [09.09 Theano 实例:Logistic 回归](09-theano/09.09-logistic-regression-.md) + - [09.10 Theano 实例:Softmax 回归](09-theano/09.10-softmax-on-mnist.md) + - [09.11 Theano 实例:人工神经网络](09-theano/09.11-net-on-mnist.md) + - [09.12 Theano 随机数流变量](09-theano/09.12-random-streams.md) + - [09.13 Theano 实例:更复杂的网络](09-theano/09.13-modern-net-on-mnist.md) + - [09.14 Theano 实例:卷积神经网络](09-theano/09.14-convolutional-net-on-mnist.md) + - [09.15 Theano tensor 模块:基础](09-theano/09.15-tensor-basics.md) + - [09.16 Theano tensor 模块:索引](09-theano/09.16-tensor-indexing.md) + - [09.17 Theano tensor 模块:操作符和逐元素操作](09-theano/09.17-tensor-operator-and-elementwise-operations.md) + - [09.18 Theano tensor 模块:nnet 子模块](09-theano/09.18-tensor-nnet-.md) + - [09.19 Theano tensor 模块:conv 子模块](09-theano/09.19-tensor-conv.md) +- [10. **有趣的第三方模块**](10-something-interesting) + - [10.01 使用 basemap 画地图](10-something-interesting/10.01-maps-using-basemap.md) + - [10.02 使用 cartopy 画地图](10-something-interesting/10.02-maps-using-cartopy.md) + - [10.03 探索 NBA 数据](10-something-interesting/10.03-nba-data.md) + - [10.04 金庸的武侠世界](10-something-interesting/10.04-louis-cha's-kungfu-world.md) +- [11. **有用的工具**](11-useful-tools) + - [11.01 pprint 模块:打印 Python 对象](11-useful-tools/11.01-pprint.md) + - [11.02 pickle, cPickle 模块:序列化 Python 对象](11-useful-tools/11.02-pickle-and-cPickle.md) + - [11.03 json 模块:处理 JSON 数据](11-useful-tools/11.03-json.md) + - [11.04 glob 模块:文件模式匹配](11-useful-tools/11.04-glob.md) + - [11.05 shutil 模块:高级文件操作](11-useful-tools/11.05-shutil.md) + - [11.06 gzip, zipfile, tarfile 模块:处理压缩文件](11-useful-tools/11.06-gzip,-zipfile,-tarfile.md) + - [11.07 logging 模块:记录日志](11-useful-tools/11.07-logging.md) + - [11.08 string 模块:字符串处理](11-useful-tools/11.08-string.md) + - [11.09 collections 模块:更多数据结构](11-useful-tools/11.09-collections.md) + - [11.10 requests 模块:HTTP for Human](11-useful-tools/11.10-requests.md) +- [12. **Pandas**](12-pandas) + - [12.01 十分钟上手 Pandas](12-pandas/12.01-ten-minutes-to-pandas.md) + - [12.02 一维数据结构:Series](12-pandas/12.02-series-in-pandas.md) + - [12.03 二维数据结构:DataFrame](12-pandas/12.03-dataframe-in-pandas.md) \ No newline at end of file diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..288cea2e --- /dev/null +++ b/docs/index.html @@ -0,0 +1,22 @@ + + + + + Document + + + + + + +
+ + + + diff --git a/generate_static_files.py b/generate_static_files.py index 99e3d33c..6ac71457 100644 --- a/generate_static_files.py +++ b/generate_static_files.py @@ -77,7 +77,7 @@ def convert_to_files(names, to_format): # In[ ]: -convert_to_files(file_names, "html") +convert_to_files(file_names, "markdown") # 产生新目录: @@ -87,6 +87,6 @@ def convert_to_files(names, to_format): with open('index.md') as f: text = f.read() - with open(os.path.join("static-files", "html", "README.md"), "w") as g: - g.write(text.replace(".ipynb", ".html")) + with open(os.path.join("static-files", "markdown", "README.md"), "w") as g: + g.write(text.replace(".ipynb", ".md"))