diff --git "a/docs/views/data/2021-03-27-\345\246\202\344\275\225\345\210\251\347\224\250pandas\345\201\232\347\256\200\345\215\225\347\232\204\346\225\260\346\215\256\345\210\206\346\236\220.md" "b/docs/views/data/2021-03-27-\345\246\202\344\275\225\345\210\251\347\224\250pandas\345\201\232\347\256\200\345\215\225\347\232\204\346\225\260\346\215\256\345\210\206\346\236\220.md" index 2940e9a0..5b1abaf8 100644 --- "a/docs/views/data/2021-03-27-\345\246\202\344\275\225\345\210\251\347\224\250pandas\345\201\232\347\256\200\345\215\225\347\232\204\346\225\260\346\215\256\345\210\206\346\236\220.md" +++ "b/docs/views/data/2021-03-27-\345\246\202\344\275\225\345\210\251\347\224\250pandas\345\201\232\347\256\200\345\215\225\347\232\204\346\225\260\346\215\256\345\210\206\346\236\220.md" @@ -1,6 +1,7 @@ --- layout: post title: 如何用pandas做简单的数据分析 + date: 2021-03-27 author: LZY categories: diff --git "a/docs/views/data/2021-11-08-week1\345\255\246\344\271\240\345\206\205\345\256\271.md" "b/docs/views/data/2021-11-08-week1\345\255\246\344\271\240\345\206\205\345\256\271.md" new file mode 100644 index 00000000..43a20225 --- /dev/null +++ "b/docs/views/data/2021-11-08-week1\345\255\246\344\271\240\345\206\205\345\256\271.md" @@ -0,0 +1,32 @@ +--- +layout: post +title: week1学习内容 +date: 2021-11-08 +author: 饶翰宇 +categories: + - 数据分析部 +tags: + - 数据分析 + - Python +--- + +## python: + +1. pandas + +算法数据结构: + +1. 决策树 +1. 随机森林 +2. 栈和队列 + +数学: + +1. 二维随机变量的分布 + +其他: + +1. 学习了基本的markdown语法 +2. 利用Typora书写markdown +3. 安装好了pandoc,配置好了上传博客的基础工具 + diff --git "a/docs/views/data/2021-11-25-MySQL\345\222\214\346\225\260\346\215\256\345\217\257\350\247\206\345\214\226.md" "b/docs/views/data/2021-11-25-MySQL\345\222\214\346\225\260\346\215\256\345\217\257\350\247\206\345\214\226.md" new file mode 100644 index 00000000..09920885 --- /dev/null +++ "b/docs/views/data/2021-11-25-MySQL\345\222\214\346\225\260\346\215\256\345\217\257\350\247\206\345\214\226.md" @@ -0,0 +1,212 @@ +--- +layout: post +title: MySQL和数据可视化 +date: 2021-11-25 +author: 饶翰宇 +categories: + - 数据分析部 +tags: + - MySQL + - Python + - 函数画图 +--- + +## week3 + +### MySQL进阶1 + +1. 查询🐱‍🐉 + + - 排序查询 + + ```mysql + select * from person order by age desc,id asc; + ``` + +2. 函数🐱‍🚀 + + - 单行函数 + + 1. 字符函数 + + - concat + + ```mysql + select concat('这瓜','保熟','吗'); + ``` + + - length(返回字节长度) + + ```mysql + select length('张三a123'); + ``` + + - substr/substring + + ```mysql + SELECT SUBSTR('今天希望你开心',5,3); + ``` + + - upper & lower + + ```mysql + select upper('abAb1'); + select lower('abAb1'); + ``` + + - instr + + ```mysql + select instr('泊松分布','分布'); + ``` + + - trim + + ```mysql + select trim(' abcd '); + select trim('ab' from 'ab abcc b'); + ``` + + - lpad & rpad + + ```mysql + SELECT LPAD('哥谭市',8,'*'); + SELECT RPAD('哥谭市',8,'*'); + ``` + + - replace + + ```mysql + SELECT REPLACE('想登上高山欲穷千里目','想','不想'); + ``` + + + + 2. 数学函数 + + - round + + ```mysql + select round(1.22,1); + ``` + + - ceil & floor(向上、向下取整) + + ```mysql + select ceil(1.9); + select floor(1.9); + ``` + + - truncate(保留几位小数) + + ```mysql + select truncate(1.231313,3); + ``` + + - mod + + ```mysql + select mod(10,3); + ``` + + 3. 日期函数 + + - now + - curdate + - curtime + + 4. 其他函数 + + 5. 流程控制函数 + + - 分组函数(统计使用) + + + +### 绘图 + +#### 绘制正态分布:jack_o_lantern: + +1. 利用随机数绘画:baby_chick: + + - 首先利用numpy生成随机标准正态分布数组 + + ```python + import numpy as np + np.random.seed(0) + data = np.random.standard_normal(100000000) + data + ``` + + ```python + array([ 1.76405235, 0.40015721, 0.97873798, ..., 0.32191089, + 0.25199669, -1.22612391]) + ``` + + - 然后使用matplotlib绘出图像 + + ```python + import matplotlib.pyplot as plt + %matplotlib inline + plt.hist(data,1000) + ``` + + ![屏幕截图 2021-11-26 121445.png](https://i.loli.net/2021/11/26/2yPKNiYHb6kuaQR.png) + + + +2. 利用sympy画图:label: + + - ```mysql + from sympy import * + from sympy.stats import Normal,density + ``` + + - ```mysql + y = symbols('y') + x = symbols('x') + y = Normal(y,0,1) + plot(density(y)(x)) + ``` + + - ```python + density(y)(x) + ``` + + - ![屏幕截图 2021-11-26 135103.png](https://i.loli.net/2021/11/26/tYrEBCmaT67iFWX.png) + + - + + ![屏幕截图 2021-11-26 133204.png](https://i.loli.net/2021/11/28/EdlYUr84F1ceCXi.png) + + + +绘制其他函数 + +1. sympy + + - ```python + plot(x,pow(x,2)) + ``` + + - ![屏幕截图 2021-11-26 141341.png](https://i.loli.net/2021/11/26/LMZstOnfU2JHKF9.png) + +2. matplotlib + + - ```python + x = np.arange(1,10,0.01) + y = np.log10(x) + u = np.arange(1,10,0.01) + w = np.exp(u) + ``` + + - ```python + plt.style.use('ggplot') + fig,ax = plt.subplots(1,2,figsize=(8,4)) + ax[0].plot(x,y,label='log10',color='r') + ax[0].legend(loc='best') + ax[1].plot(u,w,label='ex',color='b') + ax[1].legend(loc='best') + ``` + + - ![屏幕截图 2021-11-26 143624.png](https://i.loli.net/2021/11/26/wZLR4r2SmQ1G8XW.png) diff --git "a/docs/views/data/2021-12-3-\346\234\264\347\264\240\350\264\235\345\217\266\346\226\257\347\256\227\346\263\225\345\256\236\347\216\260\346\226\207\346\234\254\345\210\206\347\261\273.md" "b/docs/views/data/2021-12-3-\346\234\264\347\264\240\350\264\235\345\217\266\346\226\257\347\256\227\346\263\225\345\256\236\347\216\260\346\226\207\346\234\254\345\210\206\347\261\273.md" new file mode 100644 index 00000000..098c6c2f --- /dev/null +++ "b/docs/views/data/2021-12-3-\346\234\264\347\264\240\350\264\235\345\217\266\346\226\257\347\256\227\346\263\225\345\256\236\347\216\260\346\226\207\346\234\254\345\210\206\347\261\273.md" @@ -0,0 +1,119 @@ +--- +layout: post +title: 朴素贝叶斯算法实现文本分类 +date: 2021-12-3 +author: 饶翰宇 +categories: + - 数据分析部 +tags: + - Python + - 文本分类 +--- + +## 文本分类 + +现实中的文本复杂多样,文本分类和文本情感分析是我们开展机器学习的重要组成部分。 + +以下将用一个案例来实现对文本的分类。 + +- 首先导入原始的数据 + + 这里我们使用一个对餐厅评价的数据集 + + ```python + import pandas as pd + data = pd.read_csv('./restaurant.csv',encoding='gb18030') + data + ``` + + ![A5SF1_K_AV1CUQ9__9_Z8M7.png](https://s2.loli.net/2021/12/04/1cTFRozlOeU2W7I.png) + +- 紧接着对每条数据附上标签,将star高于3的划分为1,反之则为0 + + ```python + import numpy as np + star = np.array(data.star) + star[star <= 3] = 0 + star[star > 3] = 1 + data['label'] = star + data + ``` + + ![98ST`_R_XMSE__CTL_YN_GV.png](https://s2.loli.net/2021/12/04/RlbGJV6ZQmYShsa.png) + +- 然后我们对每条评论进行切词并且新增加一列“words” + + ```python + import jieba + data['words'] = data['comment'].apply(lambda x:' '.join(jieba.lcut(x,cut_all=True))) + data + ``` + + ![8BI6_7E8FI7864I_CVVY1_T.png](https://s2.loli.net/2021/12/04/Z8QMfj7LEolWqrX.png) + +- 对数据集进行训练集和测试集的划分 + + ```python + from sklearn.model_selection import train_test_split + x_train,x_test,y_train,y_test = train_test_split(data.words,data.label,test_size=0.2,random_state=42) + ``` + +- 导入文本特征提取方法 + + ```python + from sklearn.feature_extraction.text import CountVectorizer + ``` + +- 计算次数 + + ```python + counter = CountVectorizer() + x_train = counter.fit_transform(x_train) + x_test = counter.transform(x_test) + ``` + +- 画出图表 + + ```python + amount = x_train.toarray() + name = counter.get_feature_names() + result = pd.DataFrame(data=amount,columns=name) + result + ``` + + ![屏幕截图 2021-12-05 164011.png](https://s2.loli.net/2021/12/05/ABOXHVwYGRyFZhM.png) + +- 搭建模型 + + ```python + from sklearn.naive_bayes import MultinomialNB + estimator = MultinomialNB() + estimator.fit(x_train,y_train) + ``` + + ```python + y_predict = estimator.predict(x_test) + ``` + + ![屏幕截图 2021-12-05 164841.png](https://s2.loli.net/2021/12/05/gsTkdQZNf3vCY6o.png) + +- 计算准确率 + + ```python + estimator.score(x_test,y_test) + ``` + + $$ + 0.8475 + $$ + + + +- 查看测试集和预测目标值的正确率 + + ```python + np.array(y_test == y_predict) + ``` + + ![屏幕截图 2021-12-05 165057.png](https://s2.loli.net/2021/12/05/DWs2x9eSQcEFirm.png) + diff --git "a/docs/views/data/2022-4-1-seaborn\347\273\230\345\233\276.md" "b/docs/views/data/2022-4-1-seaborn\347\273\230\345\233\276.md" new file mode 100644 index 00000000..91a51def --- /dev/null +++ "b/docs/views/data/2022-4-1-seaborn\347\273\230\345\233\276.md" @@ -0,0 +1,558 @@ +--- +layout: post +title: seaborn绘图 +date: 2022-4-1 +author: 饶翰宇 +categories: + - 数据分析部 +tags: + - Python + - seaborn +--- + +```python +import seaborn as sns +``` + + +```python +from sklearn.datasets import load_iris,load_boston +import pandas as pd +``` + + +```python +category_dataset = load_iris() +category_data = pd.DataFrame(category_dataset.data,columns=category_dataset.feature_names) +category_data['species'] = category_dataset.target +category_data +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
sepal length (cm)sepal width (cm)petal length (cm)petal width (cm)species
05.13.51.40.20
14.93.01.40.20
24.73.21.30.20
34.63.11.50.20
45.03.61.40.20
..................
1456.73.05.22.32
1466.32.55.01.92
1476.53.05.22.02
1486.23.45.42.32
1495.93.05.11.82
+

150 rows × 5 columns

+ + +```python +regression_dataset = load_boston() +regression_data = pd.DataFrame(regression_dataset.data,columns=regression_dataset.feature_names) +regression_data['target'] = regression_dataset.target +regression_data +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOBLSTATtarget
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9824.0
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1421.6
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0334.7
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9433.4
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3336.2
.............................................
5010.062630.011.930.00.5736.59369.12.47861.0273.021.0391.999.6722.4
5020.045270.011.930.00.5736.12076.72.28751.0273.021.0396.909.0820.6
5030.060760.011.930.00.5736.97691.02.16751.0273.021.0396.905.6423.9
5040.109590.011.930.00.5736.79489.32.38891.0273.021.0393.456.4822.0
5050.047410.011.930.00.5736.03080.82.50501.0273.021.0396.907.8811.9
+

506 rows × 14 columns

+# 数值 + +## distribution+plot + + +```python +sns.displot(data['sepal length (cm)'],rug=True,palette='pastel',kind='hist') +``` + +![1](https://i.ibb.co/GRNpmz1/output-13-1.png) + + +## 核密度估计图 + + +```python +sns.kdeplot('sepal width (cm)','petal width (cm)',data=category_data,shade=True,palette=sns.color_palette('husl',2)) +``` + + +![png](https://i.ibb.co/Fhs8jnn/output-15-2.png) + + + +## 双变量分布图 + + +```python +sns.jointplot(data=category_data) +``` + + +![png](https://i.ibb.co/q15Fkn7/output-17-1.png) + + + +## 多变量分布图 + + +```python +sns.pairplot(data=category_data.iloc[:,:-1]) +``` + + +![png](https://i.ibb.co/82mcqQr/output-19-1.png) + + + +## 变量关系图 + + +```python +sns.relplot(x='sepal length (cm)',y='sepal width (cm)',data=category_data,hue='species',style='species',palette='pastel') +``` + + +![png](https://i.ibb.co/YL9gXL9/output-21-1.png) + + + +## 散点图 + + +```python +sns.scatterplot(data=category_data.iloc[:,:-1]) +``` + + +![png](https://i.ibb.co/5Mx2WMh/output-23-1.png) + + + +## 线图 + + +```python +sns.lineplot(x='sepal length (cm)',y='sepal width (cm)',data=category_data,estimator=None,hue='species', + style='species',markers=True,units='sepal width (cm)') +``` + + +![png](https://i.ibb.co/4YRf9tQ/output-25-1.png) + + + +## 回归图 + + +```python +sns.regplot(x='sepal length (cm)',y='sepal width (cm)',color='pink',marker='*',order=2,data=category_data) +``` + + +![png](https://i.ibb.co/5sqc7GH/output-27-1.png) + + +```python +sns.regplot(x=category_data['sepal length (cm)'],y=category_data['sepal width (cm)'],data=category_data) +``` + + +![png](https://i.ibb.co/zFbhZcj/output-28-1.png) + + + +## 残差分布图 + + +```python +sns.residplot(x='sepal length (cm)',y='sepal width (cm)',data=category_data,color='red') +``` + + +![png](https://i.ibb.co/p26fy27/output-30-1.png) + + +## 热力图 + + +```python +sns.heatmap(category_data.iloc[:,:-1].corr(),cbar=False,fmt='.2f',annot=True,linewidths=0.5,linecolor='gray',square=True,cmap='YlGnBu_r') +``` + + +![png](https://i.ibb.co/HPtLrwp/output-34-1.png) + +```python +sns.clustermap(category_data.iloc[:,:-1].corr(),annot=True) +``` + + +![png](https://i.ibb.co/3fJsQbj/output-35-1.png) + +# 分类 + +## 散点图 + + +```python +sns.stripplot(x='species',y='sepal width (cm)',data=category_data) +``` + + +![png](https://i.ibb.co/QFXwbJP/output-41-1.png) + + +```python +sns.swarmplot(x='species',y='sepal width (cm)',data=category_data) +``` + + +![png](https://i.ibb.co/gMDQFbg/output-42-1.png) + + +## 箱线图 + + +```python +sns.boxplot(x='species',y='sepal width (cm)',data=category_data) +``` + + +![png](https://i.ibb.co/cT6dLMw/output-45-1.png) + + +```python +sns.boxenplot(x='species',y='sepal width (cm)',data=category_data) +``` + + +![png](https://i.ibb.co/dcJqL3k/output-46-1.png) + + + +## 小提琴图 + + +```python +sns.violinplot(x='species',y='sepal width (cm)',data=category_data,kind='violin',split=True,bw=0.5,cut=.5,inner='stick',palette='pastel') +``` + + +![png](https://i.ibb.co/64mHH74/output-51-1.png) + + + +## 统计图 + + +```python +sns.pointplot(x='species',y='sepal width (cm)',data=category_data,ci=50) #均值 +``` + + +![png](https://i.ibb.co/MG9d1tz/output-54-1.png) + + +```python +sns.barplot(x='species',y='sepal width (cm)',data=category_data,ci=50) +``` + + +![png](https://i.ibb.co/tZtYrgg/output-55-1.png) + + diff --git "a/docs/views/data/2022-4-7-\346\225\260\345\255\246\346\250\241\345\236\213.md" "b/docs/views/data/2022-4-7-\346\225\260\345\255\246\346\250\241\345\236\213.md" new file mode 100644 index 00000000..b123784d --- /dev/null +++ "b/docs/views/data/2022-4-7-\346\225\260\345\255\246\346\250\241\345\236\213.md" @@ -0,0 +1,192 @@ +--- +layout: post +title: 数学建模常用模型 +date: 2022-4-7 +author: 饶翰宇 +categories: + - 数据分析部 +tags: + - Python + - 数学建模 +--- + +```python +import numpy as np +from scipy import optimize +``` + +# 线性规划 + +$$ +2x_1+3x_2+x_3 +$$ + +$$ +\begin{cases} +x_1,x_2,x_3>0 \\ +x_1+x_2+x_3<8 \\ +x_1+4x_2+3x_3>15 \\ +x_1+3x_2+4x_3=8 +\end{cases} +$$ + + +```python +c = [2,3,1] +A_ub = [[1,1,1],[-1,-4,-3]] +b_ub = [8,-15] +A_eq = [[1,3,4]] +b_eq = [8] +bounds = [[0,None],[0,None],[0,None]] +``` + + +```python +optimize.linprog(c,A_ub,b_ub,A_eq,b_eq,bounds=bounds) +``` + + + con: array([-2.82744494e-10]) + fun: 7.762393895046949 + message: 'The algorithm terminated successfully and determined that the problem is infeasible.' + nit: 4 + slack: array([ 4.57100406, -6.09559799]) + status: 2 + success: False + x: array([1.41971392, 1.45684202, 0.55244001]) + +# 指派问题 + + +```python +assign = np.random.randint(10,size=(3,4)) +assign +``` + + + array([[2, 4, 3, 8], + [0, 3, 2, 0], + [9, 5, 0, 4]]) + + +```python +opt = optimize.linear_sum_assignment(assign) +opt +``` + + + (array([0, 1, 2]), array([0, 3, 2], dtype=int64)) + + +```python +sum(assign[opt]) +``` + + + 2 + +# 非线性规划 + + +```python +optimize.minimize(lambda x:x**2+2*x+1,x0=0) +``` + + + fun: 0.0 + hess_inv: array([[0.5]]) + jac: array([0.]) + message: 'Optimization terminated successfully.' + nfev: 6 + nit: 2 + njev: 3 + status: 0 + success: True + x: array([-1.00000001]) + +# 求函数的零点和方程组的解 + + +```python +import sympy +``` + + +```python +x = sympy.Symbol('x') +solution = sympy.solve(x**2+3*x) +solution +``` + + + [-3, 0] + + +```python +for i in solution: + print(i.evalf()) +``` + + -3.00000000000000 + 0 + +```python +x,y = sympy.symbols('x y') +``` + + +```python +s = sympy.solve((x+y-2,x-y)) +s +``` + + + {x: 1, y: 1} + + +```python +optimize.root(lambda x:x**2+3*x,x0=-4) +``` + + + fjac: array([[-1.]]) + fun: array([0.]) + message: 'The solution converged.' + nfev: 9 + qtf: array([-2.04813944e-12]) + r: array([3.00000001]) + status: 1 + success: True + x: array([-3.]) + +# 微分方程 + + +```python +from sympy import * +``` + + +```python +y = Function('y') +x = symbols('x') +``` + + +```python +eq = Eq(diff(diff(y(x),x),x)-(1+diff(y(x),x)),0) +eq +``` + +$$ +\displaystyle - \frac{d}{d x} y{\left(x \right)} + \frac{d^{2}}{d x^{2}} y{\left(x \right)} - 1 = 0 +$$ + + +```python +dsolve(eq) +``` + +$$ +\displaystyle y{\left(x \right)} = C_{1} + C_{2} e^{x} - x +$$ diff --git "a/docs/views/data/MySQL\345\222\214\346\225\260\346\215\256\345\217\257\350\247\206\345\214\226.md" "b/docs/views/data/MySQL\345\222\214\346\225\260\346\215\256\345\217\257\350\247\206\345\214\226.md" new file mode 100644 index 00000000..2462e98c --- /dev/null +++ "b/docs/views/data/MySQL\345\222\214\346\225\260\346\215\256\345\217\257\350\247\206\345\214\226.md" @@ -0,0 +1,230 @@ +--- +layout: post +title: MySQL和数据可视化 +date: 2021-11-19 +author: 饶翰宇 +categories: + - 数据分析部 +tags: + - MySQL + - Python + - 数据可视化 +--- + +## week2 + +------ + +### MySQL + +1. sql基本语法 + +- 显示当前数据库 + +```mysql +show databases; +``` + +- 创建数据库 + +```mysql +create database blocks; +``` + +- 使用数据库 + +```mysql +use blocks; +``` + +- 创建表 + +```mysql +create table person(id int(10), + name varchar(15), + age int(3)); +``` + ++ 查看表结构 + +```mysql +desc person; +``` + +- 显示当前已创建的表 + +```mysql +show tables; +``` + +- 在表中插入值 + +```mysql +insert into person(id,name,age) values(1,'华强',33); +insert into person(id,name,age) values(2,'胖虎',15); +insert into person(id,name,age) values(3,'大司马',37); +insert into person(id,name,age) values(4,NULL,1); +``` + +2. 基础查询 + +- 查询全部 + +```mysql +select * from person; +``` + + + +!['person'](https://i.loli.net/2021/11/21/bwSWyJL6jd3vFkc.png 'person') + +- 查询指定列 + +```mysql +select id,name from person; +``` + +- 查询常量 + +```mysql +select 100; +select '大熊'; +``` + +- 查询表达式 + +```mysql +select 100*100; +``` + +- 查询函数 + +```mysql +select version(); +``` + +3. 其他操作 + +- 起别名 + +```mysql +select name as '姓名' from person; +``` + +- concat连接 + +```mysql +select concat(id,name) as info from person; +``` + +- ifnull + +```mysql +select id,ifnull(name,0) from person; +``` + +4. 条件查询 + +- where + +```mysql +select * from person where name = '华强'; +``` + +5. 模糊查询 + +- like + +```mysql +select * from person where name like '_强'; +select * from person where name like '大%'; +``` + +- in + +```mysql +select * from person where name in ('华强'); +``` + +- between and + +```mysql +select * from person where id between 1 and 2; +``` + +- is null(is not null) + +```mysql +select * from person where id is null; +``` + + + +### 数据可视化 + +通过python我们可以实现大量数据的可视化,这里我们运用pandas对鸢尾花数据集进行演示; + +数据处理部分: + +```python +from sklearn.datasets import load_iris +import pandas as pd +``` + +```python +dataset = load_iris() +data = pd.DataFrame(data=dataset.data,columns=dataset.feature_names) +data['species'] = dataset.target_names[dataset.target] +``` + + + +![屏幕截图 2021-11-19 170510.png](https://i.loli.net/2021/11/22/UfxoFcsHlRu3qkQ.png '鸢尾花') + +画图处理部分: + +```python +import matplotlib as plt +plt.rcParams['font.sans-serif'] = ['SimHei'] +``` + +对类别数量进行计算: + +```python +info = data.groupby('species').groups +count = {name:len(num) for name,num in info.items()} +``` + +结果: + +```python +{'setosa': 50, 'versicolor': 50, 'virginica': 50} +``` + +开始画图: + +```python +plt.figure(figsize=(8,8)) +plt.pie(count.values(),labels=count.keys(),explode=[0.05]*3,colors=['purple','pink','yellow']) +plt.legend(loc='upper right') +plt.title('鸢尾花数据集',fontsize=30,color='black',bbox={'facecolor':'white','pad':5},loc='center') +``` + + + +![屏幕截图 2021-11-19 173955.png](https://i.loli.net/2021/11/22/oQi9pVt1nfcrEeI.png '鸢尾花数据集') + +对每个特征值进行处理并画图: + +```python +plt.figure(figsize=(8,8)) +plt.style.use('ggplot') +for i in range(1,5): + plt.subplot(2,2,i) + plt.hist(data.iloc[:,i-1],10,edgecolor='k') + plt.title(data.columns[i-1]) +``` + + + +![屏幕截图 2021-11-19 180222.png](https://i.loli.net/2021/11/22/LFPI9DuHxlE5d7G.png '数据可视化')