Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1,896 changes: 1,896 additions & 0 deletions scout/supporting_data/base_disagg/2024_Convert_All.ipynb

Large diffs are not rendered by default.

3,143 changes: 3,143 additions & 0 deletions scout/supporting_data/base_disagg/scout_geography.csv

Large diffs are not rendered by default.

230 changes: 230 additions & 0 deletions scout/supporting_data/base_disagg/test01_factors_differences.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b5155e57-6bd6-46ed-9eee-d397c387b5e9",
"metadata": {},
"source": [
"### FIND DIFFERENCE BETWEEN 2024 and 2023 FACTORS"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb0bd8b5-53a7-4ddd-b842-c634d8ec666c",
"metadata": {},
"outputs": [],
"source": [
"## Absolute difference\n",
"import pandas as pd\n",
"\n",
"filedir = \"./output/\"\n",
"# filename = \"Com_Cdiv_EMM_amy2018.csv\"\n",
"\n",
"\n",
"filenames = [\"Com_Cdiv_State_amy2018.csv\", \"Com_Cdiv_EMM_amy2018.csv\",\n",
" \"Res_Cdiv_EMM_amy2018.csv\", \"Res_Cdiv_State_amy2018.csv\"]\n",
" \n",
"for filename in filenames:\n",
" file1_path = f\"{filedir}2023/{filename}\"\n",
" file2_path = f\"{filedir}2024_end_use/{filename}\"\n",
" df1 = pd.read_csv(file1_path)\n",
" df2 = pd.read_csv(file2_path)\n",
"\n",
" sorted_df1 = df1.sort_values(by=[\"End use\", \"CDIV\"]).reset_index(drop=True)\n",
" sorted_df2 = df2.sort_values(by=[\"End use\", \"CDIV\"]).reset_index(drop=True)\n",
" # if filename ==\"Com_Cdiv_State_amy2018.csv\":\n",
" # print(filename)\n",
" # print(sorted_df1.iloc[0:9])\n",
" # print(sorted_df2.iloc[0:9])\n",
" \n",
" common_columns = sorted_df1.columns.intersection(sorted_df2.columns)\n",
" df1_common = sorted_df1[common_columns]\n",
" df2_common = sorted_df2[common_columns]\n",
" df1_numeric = df1_common.select_dtypes(include='number')\n",
" df2_numeric = df2_common.select_dtypes(include='number')\n",
" absolute_difference_df = df2_numeric.subtract(df1_numeric, fill_value=0)\n",
" absolute_difference_df_from_third_column = absolute_difference_df.iloc[:, 1:]\n",
" # percentage_difference_df = df1_numeric.subtract(df2_numeric, fill_value=0).abs().divide(df1_numeric, fill_value=1)\n",
" # percentage_difference_df_from_third_column = percentage_difference_df.iloc[:, 1:]\n",
"\n",
" shifted_absolute_difference_df = pd.concat([pd.DataFrame(index=df1_common.index, columns=[\"placeholder\"]), absolute_difference_df_from_third_column], axis=1)\n",
" df_with_inserted_columns = pd.concat([df1_common.iloc[:, :2], shifted_absolute_difference_df], axis=1)\n",
"\n",
" df_with_first_two_columns = pd.concat([df1_common.iloc[:, :2], absolute_difference_df_from_third_column], axis=1)\n",
" # df_with_first_two_columns = pd.concat([df1_common.iloc[:, :2], percentage_difference_df_from_third_column], axis=1)\n",
" \n",
" df_with_first_two_columns = df_with_first_two_columns.fillna(0)\n",
" output_path = f\"{filedir}pctdiff/absdiff_{filename}\"\n",
" df_with_first_two_columns.to_csv(output_path, index=False)\n",
" print(\"COMPLETE\")\n",
"\n",
"# tools.display_dataframe_to_user(name=\"Absolute Differences (Starting from Third Column)\", dataframe=absolute_difference_df_from_third_column)\n",
"# # Output path for reference\n",
"# output_path\n",
"\n",
"\n",
"\n",
"#### ResStock, number of homes; ComStock, square footage\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "85c03de3-d185-49ec-b27a-85e4bc62e91d",
"metadata": {},
"outputs": [],
"source": [
"## Percent difference\n",
"import pandas as pd\n",
"\n",
"filedir = \"./output/\"\n",
"# filename = \"Com_Cdiv_EMM_amy2018.csv\"\n",
"\n",
"\n",
"filenames = [\"Com_Cdiv_State_amy2018.csv\", \"Com_Cdiv_EMM_amy2018.csv\",\n",
" \"Res_Cdiv_EMM_amy2018.csv\", \"Res_Cdiv_State_amy2018.csv\"]\n",
" \n",
"for filename in filenames:\n",
" file1_path = f\"{filedir}2023/{filename}\"\n",
" file2_path = f\"{filedir}2024_end_use/{filename}\"\n",
" df1 = pd.read_csv(file1_path)\n",
" df2 = pd.read_csv(file2_path)\n",
"\n",
" sorted_df1 = df1.sort_values(by=[\"End use\", \"CDIV\"]).reset_index(drop=True)\n",
" sorted_df2 = df2.sort_values(by=[\"End use\", \"CDIV\"]).reset_index(drop=True)\n",
" # if filename ==\"Com_Cdiv_State_amy2018.csv\":\n",
" # print(filename)\n",
" # print(sorted_df1.iloc[0:9])\n",
" # print(sorted_df2.iloc[0:9])\n",
" \n",
" common_columns = sorted_df1.columns.intersection(sorted_df2.columns)\n",
" df1_common = sorted_df1[common_columns]\n",
" df2_common = sorted_df2[common_columns]\n",
" df1_numeric = df1_common.select_dtypes(include='number')\n",
" df2_numeric = df2_common.select_dtypes(include='number')\n",
" # absolute_difference_df = df1_numeric.subtract(df2_numeric, fill_value=0).abs()\n",
" # absolute_difference_df_from_third_column = absolute_difference_df.iloc[:, 1:]\n",
" percentage_difference_df = df2_numeric.subtract(df1_numeric, fill_value=0).divide(df1_numeric, fill_value=1)\n",
" percentage_difference_df_from_third_column = percentage_difference_df.iloc[:, 1:]\n",
"\n",
" # shifted_absolute_difference_df = pd.concat([pd.DataFrame(index=df1_common.index, columns=[\"placeholder\"]), absolute_difference_df_from_third_column], axis=1)\n",
" # df_with_inserted_columns = pd.concat([df1_common.iloc[:, :2], shifted_absolute_difference_df], axis=1)\n",
"\n",
" # df_with_first_two_columns = pd.concat([df1_common.iloc[:, :2], absolute_difference_df_from_third_column], axis=1)\n",
" df_with_first_two_columns = pd.concat([df1_common.iloc[:, :2], percentage_difference_df_from_third_column], axis=1)\n",
" \n",
" df_with_first_two_columns = df_with_first_two_columns.fillna(0)\n",
" output_path = f\"{filedir}pctdiff/pctdiff_{filename}\"\n",
" df_with_first_two_columns.to_csv(output_path, index=False)\n",
" print(\"COMPLETE\")\n",
"\n",
"# tools.display_dataframe_to_user(name=\"Absolute Differences (Starting from Third Column)\", dataframe=absolute_difference_df_from_third_column)\n",
"# # Output path for reference\n",
"# output_path\n",
"\n",
"\n",
"\n",
"#### ResStock, number of homes; ComStock, square footage\n"
]
},
{
"cell_type": "markdown",
"id": "3b3725cd-ba72-459f-a62a-599c7572cbad",
"metadata": {},
"source": [
"### End use and Stock"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79cb7df7-1b26-470f-a32c-fa083fcbedf0",
"metadata": {},
"outputs": [],
"source": [
"## Percent difference\n",
"import pandas as pd\n",
"\n",
"filedir = \"./output/\"\n",
"# filename = \"Com_Cdiv_EMM_amy2018.csv\"\n",
"\n",
"\n",
"filenames = [\"Com_Cdiv_State_amy2018.csv\", \"Com_Cdiv_EMM_amy2018.csv\",\n",
" \"Res_Cdiv_EMM_amy2018.csv\", \"Res_Cdiv_State_amy2018.csv\"]\n",
" \n",
"for filename in filenames:\n",
" file_stock = filename.replace(\".csv\", \"_Stock.csv\")\n",
" file1_path = f\"{filedir}2024_end_use/{filename}\"\n",
" file2_path = f\"{filedir}2024_end_use/{file_stock}\"\n",
" df1 = pd.read_csv(file1_path)\n",
" df2 = pd.read_csv(file2_path)\n",
"\n",
" sorted_df1 = df1.sort_values(by=[\"End use\", \"CDIV\"]).reset_index(drop=True)\n",
" sorted_df2 = df2.sort_values(by=[\"End use\", \"CDIV\"]).reset_index(drop=True)\n",
" # if filename ==\"Com_Cdiv_State_amy2018.csv\":\n",
" # print(filename)\n",
" # print(sorted_df1.iloc[0:9])\n",
" # print(sorted_df2.iloc[0:9])\n",
" \n",
" common_columns = sorted_df1.columns.intersection(sorted_df2.columns)\n",
" df1_common = sorted_df1[common_columns]\n",
" df2_common = sorted_df2[common_columns]\n",
" df1_numeric = df1_common.select_dtypes(include='number')\n",
" df2_numeric = df2_common.select_dtypes(include='number')\n",
" # absolute_difference_df = df1_numeric.subtract(df2_numeric, fill_value=0).abs()\n",
" # absolute_difference_df_from_third_column = absolute_difference_df.iloc[:, 1:]\n",
" percentage_difference_df = df2_numeric.subtract(df1_numeric, fill_value=0).divide(df1_numeric, fill_value=1)\n",
" percentage_difference_df_from_third_column = percentage_difference_df.iloc[:, 1:]\n",
"\n",
" # shifted_absolute_difference_df = pd.concat([pd.DataFrame(index=df1_common.index, columns=[\"placeholder\"]), absolute_difference_df_from_third_column], axis=1)\n",
" # df_with_inserted_columns = pd.concat([df1_common.iloc[:, :2], shifted_absolute_difference_df], axis=1)\n",
"\n",
" # df_with_first_two_columns = pd.concat([df1_common.iloc[:, :2], absolute_difference_df_from_third_column], axis=1)\n",
" df_with_first_two_columns = pd.concat([df1_common.iloc[:, :2], percentage_difference_df_from_third_column], axis=1)\n",
" \n",
" df_with_first_two_columns = df_with_first_two_columns.fillna(0)\n",
" output_path = f\"{filedir}pctdiff/pctdiff_stock_{filename}\"\n",
" df_with_first_two_columns.to_csv(output_path, index=False)\n",
" print(\"COMPLETE\")\n",
"\n",
"# tools.display_dataframe_to_user(name=\"Absolute Differences (Starting from Third Column)\", dataframe=absolute_difference_df_from_third_column)\n",
"# # Output path for reference\n",
"# output_path\n",
"\n",
"\n",
"\n",
"#### ResStock, number of homes; ComStock, square footage\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b14d3f76-0dc4-4a9d-be0b-375d8e2e053b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading