ML/krasnodar_data.ipynb
2023-02-18 21:47:37 +07:00

1 line
11 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"mount_file_id":"11qi0Zs-HQZhJFvclA7OVLT8cqojQf2aI","authorship_tag":"ABX9TyNkG4DRTbevw8ODM0d/AIu/"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","source":["import pandas as pd\n","import numpy as np"],"metadata":{"id":"1wCgQ3uby0j8","executionInfo":{"status":"ok","timestamp":1676663619141,"user_tz":480,"elapsed":787,"user":{"displayName":"Радмир Зосимов","userId":"04742503366191314986"}}},"execution_count":1,"outputs":[]},{"cell_type":"code","source":["df = pd.read_excel('/content/krasnodar_hack_data.xlsx')\n","df['date'] = df['date'].apply(lambda x: x[1:-1])\n","df['date'] = pd.to_datetime(df['date'])\n","df['floor'] = df['floor'].apply(lambda x: str(x).replace('--', '-'))\n","df['liter_num'] = df['liter_num'].apply(lambda x: str(x).replace('--', '-'))\n","df['price_per_m'] = df['price_per_m'].apply(lambda x: x.replace('\\xa0', '').replace(' ', '')).astype(int)\n","df['num_beds'] = df['num_beds'].astype(int)\n","df['area'] = df['area'].apply(lambda x: str(x).replace(',', '.')).astype(float)\n","df"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"zYk8ByTx8DXL","executionInfo":{"status":"ok","timestamp":1676664071326,"user_tz":480,"elapsed":424,"user":{"displayName":"Радмир Зосимов","userId":"04742503366191314986"}},"outputId":"909d53dd-a9a5-4185-dda0-49fd511f3055"},"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" community liter_num done_date developer floor \\\n","0 ЖК «Смородина» 1-5 1 квартал 2023 ООО «АЛЬФА» 1-4 \n","1 ЖК «Смородина» 1-5 1 квартал 2023 ООО «АЛЬФА» 5-8 \n","2 ЖК «Смородина» 1-5 1 квартал 2023 ООО «АЛЬФА» 9-12 \n","3 ЖК «Смородина» 1-5 1 квартал 2023 ООО «АЛЬФА» 13-16 \n","4 ЖК «Смородина» 1-5 1 квартал 2023 ООО «АЛЬФА» 1-4 \n","... ... ... ... ... ... \n","1284 ЖК «Role Clef» nan 2 полугодие 2021 ООО «Уютный дом» 2-12 \n","1285 ЖК «Role Clef» nan 2 полугодие 2021 ООО «Уютный дом» 2-12 \n","1286 ЖК «Role Clef» nan 2 полугодие 2021 ООО «Уютный дом» 2-12 \n","1287 ЖК «Мелодия» nan дом сдан NaN 5 \n","1288 ЖК «Мелодия» nan дом сдан NaN 11 \n","\n"," area num_beds price_per_m date \n","0 32.95 1 118000 2022-01-25 \n","1 32.95 1 119000 2022-01-25 \n","2 32.95 1 120000 2022-01-25 \n","3 32.95 1 121000 2022-01-25 \n","4 35.20 1 117000 2022-01-25 \n","... ... ... ... ... \n","1284 NaN 1 586000 2023-02-15 \n","1285 NaN 2 400000 2023-02-15 \n","1286 NaN 3 395000 2023-02-15 \n","1287 NaN 2 94000 2023-02-15 \n","1288 NaN 2 94000 2023-02-15 \n","\n","[1289 rows x 9 columns]"],"text/html":["\n"," <div id=\"df-51bcb803-24d8-4839-9267-3795b41713f9\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>community</th>\n"," <th>liter_num</th>\n"," <th>done_date</th>\n"," <th>developer</th>\n"," <th>floor</th>\n"," <th>area</th>\n"," <th>num_beds</th>\n"," <th>price_per_m</th>\n"," <th>date</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>ЖК «Смородина»</td>\n"," <td>1-5</td>\n"," <td>1 квартал 2023</td>\n"," <td>ООО «АЛЬФА»</td>\n"," <td>1-4</td>\n"," <td>32.95</td>\n"," <td>1</td>\n"," <td>118000</td>\n"," <td>2022-01-25</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>ЖК «Смородина»</td>\n"," <td>1-5</td>\n"," <td>1 квартал 2023</td>\n"," <td>ООО «АЛЬФА»</td>\n"," <td>5-8</td>\n"," <td>32.95</td>\n"," <td>1</td>\n"," <td>119000</td>\n"," <td>2022-01-25</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>ЖК «Смородина»</td>\n"," <td>1-5</td>\n"," <td>1 квартал 2023</td>\n"," <td>ООО «АЛЬФА»</td>\n"," <td>9-12</td>\n"," <td>32.95</td>\n"," <td>1</td>\n"," <td>120000</td>\n"," <td>2022-01-25</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>ЖК «Смородина»</td>\n"," <td>1-5</td>\n"," <td>1 квартал 2023</td>\n"," <td>ООО «АЛЬФА»</td>\n"," <td>13-16</td>\n"," <td>32.95</td>\n"," <td>1</td>\n"," <td>121000</td>\n"," <td>2022-01-25</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>ЖК «Смородина»</td>\n"," <td>1-5</td>\n"," <td>1 квартал 2023</td>\n"," <td>ООО «АЛЬФА»</td>\n"," <td>1-4</td>\n"," <td>35.20</td>\n"," <td>1</td>\n"," <td>117000</td>\n"," <td>2022-01-25</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1284</th>\n"," <td>ЖК «Role Clef»</td>\n"," <td>nan</td>\n"," <td>2 полугодие 2021</td>\n"," <td>ООО «Уютный дом»</td>\n"," <td>2-12</td>\n"," <td>NaN</td>\n"," <td>1</td>\n"," <td>586000</td>\n"," <td>2023-02-15</td>\n"," </tr>\n"," <tr>\n"," <th>1285</th>\n"," <td>ЖК «Role Clef»</td>\n"," <td>nan</td>\n"," <td>2 полугодие 2021</td>\n"," <td>ООО «Уютный дом»</td>\n"," <td>2-12</td>\n"," <td>NaN</td>\n"," <td>2</td>\n"," <td>400000</td>\n"," <td>2023-02-15</td>\n"," </tr>\n"," <tr>\n"," <th>1286</th>\n"," <td>ЖК «Role Clef»</td>\n"," <td>nan</td>\n"," <td>2 полугодие 2021</td>\n"," <td>ООО «Уютный дом»</td>\n"," <td>2-12</td>\n"," <td>NaN</td>\n"," <td>3</td>\n"," <td>395000</td>\n"," <td>2023-02-15</td>\n"," </tr>\n"," <tr>\n"," <th>1287</th>\n"," <td>ЖК «Мелодия»</td>\n"," <td>nan</td>\n"," <td>дом сдан</td>\n"," <td>NaN</td>\n"," <td>5</td>\n"," <td>NaN</td>\n"," <td>2</td>\n"," <td>94000</td>\n"," <td>2023-02-15</td>\n"," </tr>\n"," <tr>\n"," <th>1288</th>\n"," <td>ЖК «Мелодия»</td>\n"," <td>nan</td>\n"," <td>дом сдан</td>\n"," <td>NaN</td>\n"," <td>11</td>\n"," <td>NaN</td>\n"," <td>2</td>\n"," <td>94000</td>\n"," <td>2023-02-15</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1289 rows × 9 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-51bcb803-24d8-4839-9267-3795b41713f9')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-51bcb803-24d8-4839-9267-3795b41713f9 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-51bcb803-24d8-4839-9267-3795b41713f9');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":22}]},{"cell_type":"code","source":["df.to_csv('krasnodar_data_final.csv', index=False)"],"metadata":{"id":"v7B0OICi8LOx","executionInfo":{"status":"ok","timestamp":1676664099279,"user_tz":480,"elapsed":119,"user":{"displayName":"Радмир Зосимов","userId":"04742503366191314986"}}},"execution_count":23,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"UNgPSTGM8Ypj"},"execution_count":null,"outputs":[]}]}