In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import seaborn as sns
import sklearn as sk
In [2]:
sns.set_style('whitegrid')
sns.set_palette('flare_r')
sns.color_palette('flare_r')
Out[2]:
In [3]:
df_all = pd.read_csv('Dados Tratados/output_geral.csv', sep=';')
In [4]:
df_min = pd.read_csv('Dados Tratados/output_owners_min.csv', sep=';')
In [5]:
df_max = pd.read_csv('Dados Tratados/output_owners.csv', sep=';')
In [6]:
Colunas = ['appid','name','Year','english','platforms_windows','platforms_mac','platforms_linux','genres_Action',
           'genres_Free to Play', 'genres_Strategy','genres_Adventure', 'genres_Indie', 'genres_RPG',
           'genres_Casual','genres_Simulation', 'genres_Racing', 'genres_Violent','genres_Massively Multiplayer',
           'genres_Nudity', 'genres_Sports',       'genres_Early Access', 'genres_Gore', 'genres_Sexual Content',
           'categories_Multi-player', 'categories_Online Multi-Player','categories_Local Multi-Player',
           'categories_Valve Anti-Cheat enabled','categories_Single-player', 'categories_Steam Cloud',
           'categories_Steam Achievements', 'categories_Steam Trading Cards','categories_Captions available',
           'categories_Partial Controller Support','categories_Includes Source SDK','categories_Cross-Platform Multiplayer',
           'categories_Stats','categories_Commentary available', 'categories_Includes level editor',
           'categories_Steam Workshop', 'categories_In-App Purchases','categories_Co-op',
           'categories_Full controller support','categories_Steam Leaderboards', 'categories_SteamVR Collectibles',
           'categories_Online Co-op', 'categories_Shared/Split Screen','categories_Local Co-op', 'categories_MMO', 
           'categories_VR Support','categories_Mods', 'categories_Mods (require HL2)',
           'categories_Steam Turn Notifications','achievements','%_ratings','QTD_ratings','owners_mean',
           'average_playtime_>0','price','log_price','log_log_price']
In [7]:
Colunas_Tratada = ['Year','english','platforms_windows','platforms_mac','platforms_linux','genres_Action',
           'genres_Free to Play', 'genres_Strategy','genres_Adventure', 'genres_Indie', 'genres_RPG',
           'genres_Casual','genres_Simulation', 'genres_Racing', 'genres_Violent','genres_Massively Multiplayer',
           'genres_Nudity', 'genres_Sports',       'genres_Early Access', 'genres_Gore', 'genres_Sexual Content',
           'categories_Multi-player', 'categories_Online Multi-Player','categories_Local Multi-Player',
           'categories_Valve Anti-Cheat enabled','categories_Single-player', 'categories_Steam Cloud',
           'categories_Steam Achievements', 'categories_Steam Trading Cards','categories_Captions available',
           'categories_Partial Controller Support','categories_Includes Source SDK','categories_Cross-Platform Multiplayer',
           'categories_Stats','categories_Commentary available', 'categories_Includes level editor',
           'categories_Steam Workshop', 'categories_In-App Purchases','categories_Co-op',
           'categories_Full controller support','categories_Steam Leaderboards', 'categories_SteamVR Collectibles',
           'categories_Online Co-op', 'categories_Shared/Split Screen','categories_Local Co-op', 'categories_MMO', 
           'categories_VR Support','categories_Mods', 'categories_Mods (require HL2)',
           'categories_Steam Turn Notifications','achievements','%_ratings','QTD_ratings','owners_mean',
           'average_playtime_>0']
In [8]:
df_all.head()
Out[8]:
Unnamed: 0 appid name Year english platforms_windows platforms_mac platforms_linux genres_Action genres_Free to Play ... categories_Mods (require HL2) categories_Steam Turn Notifications achievements %_ratings QTD_ratings owners_mean average_playtime_>0 price log_price log_log_price
0 0 10 Counter-Strike 2000 1 1 1 1 1 0 ... 0 0 0 0.973888 127873 15000000.0 1 7.19 2.102914 1.132342
1 1 20 Team Fortress Classic 1999 1 1 1 1 1 0 ... 0 0 0 0.839787 3951 7500000.0 1 3.99 1.607436 0.958367
2 2 30 Day of Defeat 2003 1 1 1 1 1 0 ... 0 0 0 0.895648 3814 7500000.0 1 3.99 1.607436 0.958367
3 3 40 Deathmatch Classic 2001 1 1 1 1 1 0 ... 0 0 0 0.826623 1540 7500000.0 1 3.99 1.607436 0.958367
4 4 50 Half-Life: Opposing Force 1999 1 1 1 1 1 0 ... 0 0 0 0.947996 5538 7500000.0 1 3.99 1.607436 0.958367

5 rows × 61 columns

In [9]:
df_min.head()
Out[9]:
Unnamed: 0 appid name Year english platforms_windows platforms_mac platforms_linux genres_Action genres_Free to Play ... categories_Mods (require HL2) categories_Steam Turn Notifications achievements %_ratings QTD_ratings owners_mean average_playtime_>0 price log_price log_log_price
0 122 4100 Poker Superstars II 2006 1 1 0 0 0 0 ... 0 0 0 0.444444 9 10000.0 0 5.99 1.944481 1.079932
1 124 4290 RACE: Caterham Expansion 2007 1 1 0 0 0 0 ... 0 0 0 0.666667 6 10000.0 0 3.99 1.607436 0.958367
2 145 4900 Zen of Sudoku 2006 1 1 0 0 0 0 ... 0 0 0 0.877698 139 10000.0 0 0.00 0.000000 0.000000
3 188 7340 Azada 2010 1 1 0 0 0 0 ... 0 0 0 0.666667 12 10000.0 0 6.99 2.078191 1.124342
4 248 9960 Haunted House™ 2010 1 1 0 0 0 0 ... 0 0 0 0.409091 22 10000.0 0 4.79 1.756132 1.013828

5 rows × 61 columns

In [10]:
df_max.head()
Out[10]:
Unnamed: 0 appid name Year english platforms_windows platforms_mac platforms_linux genres_Action genres_Free to Play ... categories_Mods (require HL2) categories_Steam Turn Notifications achievements %_ratings QTD_ratings owners_mean average_playtime_>0 price log_price log_log_price
0 0 10 Counter-Strike 2000 1 1 1 1 1 0 ... 0 0 0 0.973888 127873 15000000.0 1 7.19 2.102914 1.132342
1 1 20 Team Fortress Classic 1999 1 1 1 1 1 0 ... 0 0 0 0.839787 3951 7500000.0 1 3.99 1.607436 0.958367
2 2 30 Day of Defeat 2003 1 1 1 1 1 0 ... 0 0 0 0.895648 3814 7500000.0 1 3.99 1.607436 0.958367
3 3 40 Deathmatch Classic 2001 1 1 1 1 1 0 ... 0 0 0 0.826623 1540 7500000.0 1 3.99 1.607436 0.958367
4 4 50 Half-Life: Opposing Force 1999 1 1 1 1 1 0 ... 0 0 0 0.947996 5538 7500000.0 1 3.99 1.607436 0.958367

5 rows × 61 columns

In [11]:
df_all.describe()
Out[11]:
Unnamed: 0 appid Year english platforms_windows platforms_mac platforms_linux genres_Action genres_Free to Play genres_Strategy ... categories_Mods (require HL2) categories_Steam Turn Notifications achievements %_ratings QTD_ratings owners_mean average_playtime_>0 price log_price log_log_price
count 26846.000000 2.684600e+04 26846.000000 26846.000000 26846.000000 26846.000000 26846.000000 26846.000000 26846.000000 26846.000000 ... 26846.000000 26846.000000 26846.000000 26846.000000 2.684600e+04 2.684600e+04 26846.000000 26846.000000 26846.000000 26846.000000
mean 13537.970834 5.961759e+05 2016.496908 0.981040 0.999814 0.299113 0.194256 0.443306 0.063175 0.195374 ... 0.000037 0.002347 45.613536 0.714424 1.217097e+03 1.344908e+05 0.228600 5.999415 1.585724 0.882937
std 7817.936144 2.510454e+05 2.161338 0.136386 0.013646 0.457878 0.395634 0.496785 0.243282 0.396495 ... 0.006103 0.048387 354.148334 0.233717 2.252065e+04 1.332872e+06 0.419939 7.244335 0.864164 0.387937
min 0.000000 1.000000e+01 1997.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 1.000000e+00 1.000000e+04 0.000000 0.000000 0.000000 0.000000
25% 6764.250000 4.011325e+05 2016.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.583333 1.000000e+01 1.000000e+04 0.000000 1.690000 0.989541 0.687904
50% 13543.500000 5.992850e+05 2017.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 7.000000 0.760435 3.600000e+01 1.000000e+04 0.000000 3.990000 1.607436 0.958367
75% 20310.750000 7.989175e+05 2018.000000 1.000000 1.000000 1.000000 0.000000 1.000000 0.000000 0.000000 ... 0.000000 0.000000 23.000000 0.893838 1.770000e+02 3.500000e+04 0.000000 7.190000 2.102914 1.132342
max 27074.000000 1.069460e+06 2019.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... 1.000000 1.000000 9821.000000 1.000000 3.046717e+06 1.500000e+08 1.000000 421.990000 6.047349 1.952651

8 rows × 60 columns

In [12]:
df_min.describe()
Out[12]:
Unnamed: 0 appid Year english platforms_windows platforms_mac platforms_linux genres_Action genres_Free to Play genres_Strategy ... categories_Mods (require HL2) categories_Steam Turn Notifications achievements %_ratings QTD_ratings owners_mean average_playtime_>0 price log_price log_log_price
count 18433.000000 1.843300e+04 18433.000000 18433.000000 18433.000000 18433.000000 18433.000000 18433.000000 18433.000000 18433.000000 ... 18433.0 18433.000000 18433.000000 18433.000000 18433.000000 18433.0 18433.000000 18433.000000 18433.000000 18433.000000
mean 16271.622525 6.847525e+05 2017.195682 0.976781 0.999783 0.259372 0.161395 0.435740 0.027776 0.182065 ... 0.0 0.002062 46.889546 0.707029 40.729290 10000.0 0.041827 5.501475 1.570516 0.890951
std 6881.558945 2.120956e+05 1.388686 0.150603 0.014730 0.438302 0.367905 0.495867 0.164336 0.385908 ... 0.0 0.045358 381.471008 0.255186 71.600204 0.0 0.200199 6.760558 0.778563 0.342277
min 122.000000 4.100000e+03 2006.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.0 0.000000 0.000000 0.000000 1.000000 10000.0 0.000000 0.000000 0.000000 0.000000
25% 10879.000000 5.254500e+05 2017.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.0 0.000000 0.000000 0.557692 6.000000 10000.0 0.000000 1.690000 0.989541 0.687904
50% 16973.000000 7.035100e+05 2017.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.0 0.000000 5.000000 0.750000 17.000000 10000.0 0.000000 3.990000 1.607436 0.958367
75% 22122.000000 8.544500e+05 2018.000000 1.000000 1.000000 1.000000 0.000000 1.000000 0.000000 0.000000 ... 0.0 0.000000 20.000000 0.910256 42.000000 10000.0 0.000000 7.190000 2.102914 1.132342
max 27074.000000 1.069460e+06 2019.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... 0.0 1.000000 5394.000000 1.000000 1880.000000 10000.0 1.000000 421.990000 6.047349 1.952651

8 rows × 60 columns

In [13]:
df_max.describe()
Out[13]:
Unnamed: 0 appid Year english platforms_windows platforms_mac platforms_linux genres_Action genres_Free to Play genres_Strategy ... categories_Mods (require HL2) categories_Steam Turn Notifications achievements %_ratings QTD_ratings owners_mean average_playtime_>0 price log_price log_log_price
count 8413.000000 8.413000e+03 8413.000000 8413.000000 8413.000000 8413.000000 8413.000000 8413.000000 8413.000000 8413.000000 ... 8413.000000 8413.000000 8413.000000 8413.000000 8.413000e+03 8.413000e+03 8413.000000 8413.000000 8413.000000 8413.000000
mean 7548.501961 4.021034e+05 2014.965886 0.990372 0.999881 0.386188 0.266255 0.459884 0.140735 0.224533 ... 0.000119 0.002972 42.817782 0.730627 3.794534e+03 4.072519e+05 0.637822 7.090409 1.619045 0.865379
std 6247.718107 2.183967e+05 2.695896 0.097654 0.010902 0.486903 0.442025 0.498418 0.347768 0.417300 ... 0.010902 0.054434 285.279410 0.176774 4.011062e+04 2.358195e+06 0.480658 8.099292 1.026337 0.472357
min 0.000000 1.000000e+01 1997.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 1.000000e+00 3.500000e+04 0.000000 0.000000 0.000000 0.000000
25% 2381.000000 2.682600e+05 2014.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.625569 1.360000e+02 3.500000e+04 0.000000 0.900000 0.641854 0.495826
50% 6021.000000 3.805600e+05 2016.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 13.000000 0.767386 3.980000e+02 7.500000e+04 1.000000 4.790000 1.756132 1.013828
75% 11299.000000 5.369300e+05 2017.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 ... 0.000000 0.000000 33.000000 0.871795 1.386000e+03 3.500000e+05 1.000000 9.990000 2.396986 1.222888
max 27034.000000 1.057690e+06 2019.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 ... 1.000000 1.000000 9821.000000 1.000000 3.046717e+06 1.500000e+08 1.000000 60.990000 4.126973 1.634515

8 rows × 60 columns

In [14]:
df_all.shape
Out[14]:
(26846, 61)
In [15]:
df_min.shape
Out[15]:
(18433, 61)
In [16]:
df_max.shape
Out[16]:
(8413, 61)
In [17]:
df_all.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26846 entries, 0 to 26845
Data columns (total 61 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Unnamed: 0                             26846 non-null  int64  
 1   appid                                  26846 non-null  int64  
 2   name                                   26846 non-null  object 
 3   Year                                   26846 non-null  int64  
 4   english                                26846 non-null  int64  
 5   platforms_windows                      26846 non-null  int64  
 6   platforms_mac                          26846 non-null  int64  
 7   platforms_linux                        26846 non-null  int64  
 8   genres_Action                          26846 non-null  int64  
 9   genres_Free to Play                    26846 non-null  int64  
 10  genres_Strategy                        26846 non-null  int64  
 11  genres_Adventure                       26846 non-null  int64  
 12  genres_Indie                           26846 non-null  int64  
 13  genres_RPG                             26846 non-null  int64  
 14  genres_Casual                          26846 non-null  int64  
 15  genres_Simulation                      26846 non-null  int64  
 16  genres_Racing                          26846 non-null  int64  
 17  genres_Violent                         26846 non-null  int64  
 18  genres_Massively Multiplayer           26846 non-null  int64  
 19  genres_Nudity                          26846 non-null  int64  
 20  genres_Sports                          26846 non-null  int64  
 21  genres_Early Access                    26846 non-null  int64  
 22  genres_Gore                            26846 non-null  int64  
 23  genres_Sexual Content                  26846 non-null  int64  
 24  categories_Multi-player                26846 non-null  int64  
 25  categories_Online Multi-Player         26846 non-null  int64  
 26  categories_Local Multi-Player          26846 non-null  int64  
 27  categories_Valve Anti-Cheat enabled    26846 non-null  int64  
 28  categories_Single-player               26846 non-null  int64  
 29  categories_Steam Cloud                 26846 non-null  int64  
 30  categories_Steam Achievements          26846 non-null  int64  
 31  categories_Steam Trading Cards         26846 non-null  int64  
 32  categories_Captions available          26846 non-null  int64  
 33  categories_Partial Controller Support  26846 non-null  int64  
 34  categories_Includes Source SDK         26846 non-null  int64  
 35  categories_Cross-Platform Multiplayer  26846 non-null  int64  
 36  categories_Stats                       26846 non-null  int64  
 37  categories_Commentary available        26846 non-null  int64  
 38  categories_Includes level editor       26846 non-null  int64  
 39  categories_Steam Workshop              26846 non-null  int64  
 40  categories_In-App Purchases            26846 non-null  int64  
 41  categories_Co-op                       26846 non-null  int64  
 42  categories_Full controller support     26846 non-null  int64  
 43  categories_Steam Leaderboards          26846 non-null  int64  
 44  categories_SteamVR Collectibles        26846 non-null  int64  
 45  categories_Online Co-op                26846 non-null  int64  
 46  categories_Shared/Split Screen         26846 non-null  int64  
 47  categories_Local Co-op                 26846 non-null  int64  
 48  categories_MMO                         26846 non-null  int64  
 49  categories_VR Support                  26846 non-null  int64  
 50  categories_Mods                        26846 non-null  int64  
 51  categories_Mods (require HL2)          26846 non-null  int64  
 52  categories_Steam Turn Notifications    26846 non-null  int64  
 53  achievements                           26846 non-null  int64  
 54  %_ratings                              26846 non-null  float64
 55  QTD_ratings                            26846 non-null  int64  
 56  owners_mean                            26846 non-null  float64
 57  average_playtime_>0                    26846 non-null  int64  
 58  price                                  26846 non-null  float64
 59  log_price                              26846 non-null  float64
 60  log_log_price                          26846 non-null  float64
dtypes: float64(5), int64(55), object(1)
memory usage: 12.5+ MB
In [18]:
df_min.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18433 entries, 0 to 18432
Data columns (total 61 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Unnamed: 0                             18433 non-null  int64  
 1   appid                                  18433 non-null  int64  
 2   name                                   18433 non-null  object 
 3   Year                                   18433 non-null  int64  
 4   english                                18433 non-null  int64  
 5   platforms_windows                      18433 non-null  int64  
 6   platforms_mac                          18433 non-null  int64  
 7   platforms_linux                        18433 non-null  int64  
 8   genres_Action                          18433 non-null  int64  
 9   genres_Free to Play                    18433 non-null  int64  
 10  genres_Strategy                        18433 non-null  int64  
 11  genres_Adventure                       18433 non-null  int64  
 12  genres_Indie                           18433 non-null  int64  
 13  genres_RPG                             18433 non-null  int64  
 14  genres_Casual                          18433 non-null  int64  
 15  genres_Simulation                      18433 non-null  int64  
 16  genres_Racing                          18433 non-null  int64  
 17  genres_Violent                         18433 non-null  int64  
 18  genres_Massively Multiplayer           18433 non-null  int64  
 19  genres_Nudity                          18433 non-null  int64  
 20  genres_Sports                          18433 non-null  int64  
 21  genres_Early Access                    18433 non-null  int64  
 22  genres_Gore                            18433 non-null  int64  
 23  genres_Sexual Content                  18433 non-null  int64  
 24  categories_Multi-player                18433 non-null  int64  
 25  categories_Online Multi-Player         18433 non-null  int64  
 26  categories_Local Multi-Player          18433 non-null  int64  
 27  categories_Valve Anti-Cheat enabled    18433 non-null  int64  
 28  categories_Single-player               18433 non-null  int64  
 29  categories_Steam Cloud                 18433 non-null  int64  
 30  categories_Steam Achievements          18433 non-null  int64  
 31  categories_Steam Trading Cards         18433 non-null  int64  
 32  categories_Captions available          18433 non-null  int64  
 33  categories_Partial Controller Support  18433 non-null  int64  
 34  categories_Includes Source SDK         18433 non-null  int64  
 35  categories_Cross-Platform Multiplayer  18433 non-null  int64  
 36  categories_Stats                       18433 non-null  int64  
 37  categories_Commentary available        18433 non-null  int64  
 38  categories_Includes level editor       18433 non-null  int64  
 39  categories_Steam Workshop              18433 non-null  int64  
 40  categories_In-App Purchases            18433 non-null  int64  
 41  categories_Co-op                       18433 non-null  int64  
 42  categories_Full controller support     18433 non-null  int64  
 43  categories_Steam Leaderboards          18433 non-null  int64  
 44  categories_SteamVR Collectibles        18433 non-null  int64  
 45  categories_Online Co-op                18433 non-null  int64  
 46  categories_Shared/Split Screen         18433 non-null  int64  
 47  categories_Local Co-op                 18433 non-null  int64  
 48  categories_MMO                         18433 non-null  int64  
 49  categories_VR Support                  18433 non-null  int64  
 50  categories_Mods                        18433 non-null  int64  
 51  categories_Mods (require HL2)          18433 non-null  int64  
 52  categories_Steam Turn Notifications    18433 non-null  int64  
 53  achievements                           18433 non-null  int64  
 54  %_ratings                              18433 non-null  float64
 55  QTD_ratings                            18433 non-null  int64  
 56  owners_mean                            18433 non-null  float64
 57  average_playtime_>0                    18433 non-null  int64  
 58  price                                  18433 non-null  float64
 59  log_price                              18433 non-null  float64
 60  log_log_price                          18433 non-null  float64
dtypes: float64(5), int64(55), object(1)
memory usage: 8.6+ MB
In [19]:
df_max.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8413 entries, 0 to 8412
Data columns (total 61 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Unnamed: 0                             8413 non-null   int64  
 1   appid                                  8413 non-null   int64  
 2   name                                   8413 non-null   object 
 3   Year                                   8413 non-null   int64  
 4   english                                8413 non-null   int64  
 5   platforms_windows                      8413 non-null   int64  
 6   platforms_mac                          8413 non-null   int64  
 7   platforms_linux                        8413 non-null   int64  
 8   genres_Action                          8413 non-null   int64  
 9   genres_Free to Play                    8413 non-null   int64  
 10  genres_Strategy                        8413 non-null   int64  
 11  genres_Adventure                       8413 non-null   int64  
 12  genres_Indie                           8413 non-null   int64  
 13  genres_RPG                             8413 non-null   int64  
 14  genres_Casual                          8413 non-null   int64  
 15  genres_Simulation                      8413 non-null   int64  
 16  genres_Racing                          8413 non-null   int64  
 17  genres_Violent                         8413 non-null   int64  
 18  genres_Massively Multiplayer           8413 non-null   int64  
 19  genres_Nudity                          8413 non-null   int64  
 20  genres_Sports                          8413 non-null   int64  
 21  genres_Early Access                    8413 non-null   int64  
 22  genres_Gore                            8413 non-null   int64  
 23  genres_Sexual Content                  8413 non-null   int64  
 24  categories_Multi-player                8413 non-null   int64  
 25  categories_Online Multi-Player         8413 non-null   int64  
 26  categories_Local Multi-Player          8413 non-null   int64  
 27  categories_Valve Anti-Cheat enabled    8413 non-null   int64  
 28  categories_Single-player               8413 non-null   int64  
 29  categories_Steam Cloud                 8413 non-null   int64  
 30  categories_Steam Achievements          8413 non-null   int64  
 31  categories_Steam Trading Cards         8413 non-null   int64  
 32  categories_Captions available          8413 non-null   int64  
 33  categories_Partial Controller Support  8413 non-null   int64  
 34  categories_Includes Source SDK         8413 non-null   int64  
 35  categories_Cross-Platform Multiplayer  8413 non-null   int64  
 36  categories_Stats                       8413 non-null   int64  
 37  categories_Commentary available        8413 non-null   int64  
 38  categories_Includes level editor       8413 non-null   int64  
 39  categories_Steam Workshop              8413 non-null   int64  
 40  categories_In-App Purchases            8413 non-null   int64  
 41  categories_Co-op                       8413 non-null   int64  
 42  categories_Full controller support     8413 non-null   int64  
 43  categories_Steam Leaderboards          8413 non-null   int64  
 44  categories_SteamVR Collectibles        8413 non-null   int64  
 45  categories_Online Co-op                8413 non-null   int64  
 46  categories_Shared/Split Screen         8413 non-null   int64  
 47  categories_Local Co-op                 8413 non-null   int64  
 48  categories_MMO                         8413 non-null   int64  
 49  categories_VR Support                  8413 non-null   int64  
 50  categories_Mods                        8413 non-null   int64  
 51  categories_Mods (require HL2)          8413 non-null   int64  
 52  categories_Steam Turn Notifications    8413 non-null   int64  
 53  achievements                           8413 non-null   int64  
 54  %_ratings                              8413 non-null   float64
 55  QTD_ratings                            8413 non-null   int64  
 56  owners_mean                            8413 non-null   float64
 57  average_playtime_>0                    8413 non-null   int64  
 58  price                                  8413 non-null   float64
 59  log_price                              8413 non-null   float64
 60  log_log_price                          8413 non-null   float64
dtypes: float64(5), int64(55), object(1)
memory usage: 3.9+ MB

Com isso, conseguimos dar uma boa olhada nos dados que temos a nossa disposição. Confirmamos, também, que a separação proposta no aquivo anterior realmente funcionou.

In [20]:
ax =sns.boxplot(data=df_all, y='price')
ax.figure.set_size_inches(15,6)
In [21]:
ax =sns.boxplot(data=df_all, y='log_price')
ax.figure.set_size_inches(15,6)
In [22]:
ax =sns.boxplot(data=df_all, y='log_log_price')
ax.figure.set_size_inches(15,6)
In [23]:
ax = sns.boxplot(data=df_min, y='price')
ax.figure.set_size_inches(15,6)
In [24]:
ax = sns.boxplot(data=df_min, y='log_price')
ax.figure.set_size_inches(15,6)
In [25]:
ax = sns.boxplot(data=df_min, y='log_log_price')
ax.figure.set_size_inches(15,6)
In [26]:
ax = sns.boxplot(data=df_max, y='price')
ax.figure.set_size_inches(15,6)
In [27]:
ax = sns.boxplot(data=df_max, y='log_price')
ax.figure.set_size_inches(15,6)
In [28]:
ax = sns.boxplot(data=df_max, y='log_log_price')
ax.figure.set_size_inches(15,6)
In [29]:
ax = sns.histplot(data=df_all['price'], kde=True)
ax.figure.set_size_inches(15,6)
In [30]:
ax = sns.histplot(data=df_all['log_price'], kde=True)
ax.figure.set_size_inches(15,6)
In [31]:
ax = sns.histplot(data=df_all['log_log_price'], kde=True)
ax.figure.set_size_inches(15,6)
In [32]:
ax = sns.histplot(data=df_min['price'], kde=True)
ax.figure.set_size_inches(15,6)
In [33]:
ax = sns.histplot(data=df_min['log_price'], kde=True)
ax.figure.set_size_inches(15,6)
In [34]:
ax = sns.histplot(data=df_min['log_log_price'], kde=True)
ax.figure.set_size_inches(15,6)
In [35]:
ax = sns.histplot(data=df_max['price'], kde=True)
ax.figure.set_size_inches(15,6)
In [36]:
ax = sns.histplot(data=df_max['log_price'], kde=True)
ax.figure.set_size_inches(15,6)
In [37]:
ax = sns.histplot(data=df_max['log_log_price'], kde=True)
ax.figure.set_size_inches(15,6)

É perceptível que nenhuma das distribuições de preço segue o formato de uma normal. Poderiamos utilizar um teste mais robusto, mas, só pela distribuição dos histogramas, já sabemos que nenhum dos formatos está próximo.

Como temos muitas variáveis, também fica um pouco difícil de se analisar o impacto de cada variável no preço, separadamente. Por isso, acho que é melhor seguir direto para a regressão linear, e utilizar os dados da própria pra refinar o modelo. Particularmente, prefiro a apresentação dos dados da biblioteca StatsModels, então, primeiro farei os testes nela, e depois construirei o modelo de machine learning com a biblioteca sklearn.
Também pra não gerar maiores confusões, vou primeiro usar o dataframe com todos os dados, depois com os dados cujo número médio de owners é 10 mil, e por fim, com o número médio de owners diferente de 10 mil.

In [38]:
Colunas_all = Colunas_Tratada.copy()
In [39]:
len(Colunas_all)
Out[39]:
55
In [40]:
y_all = df_all['price']
In [41]:
X_all = sm.add_constant(df_all[Colunas_all])
In [42]:
regressão_all = sm.OLS(y_all, X_all).fit()
In [43]:
regressão_all.summary()
Out[43]:
OLS Regression Results
Dep. Variable: price R-squared: 0.227
Model: OLS Adj. R-squared: 0.226
Method: Least Squares F-statistic: 143.4
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:55 Log-Likelihood: -87789.
No. Observations: 26846 AIC: 1.757e+05
Df Residuals: 26790 BIC: 1.761e+05
Df Model: 55
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -398.4477 43.768 -9.104 0.000 -484.235 -312.661
Year 0.1993 0.022 9.205 0.000 0.157 0.242
english 0.2238 0.291 0.769 0.442 -0.346 0.794
platforms_windows 1.0391 2.856 0.364 0.716 -4.560 6.638
platforms_mac 0.1497 0.112 1.338 0.181 -0.070 0.369
platforms_linux -0.3864 0.129 -2.988 0.003 -0.640 -0.133
genres_Action -0.3861 0.088 -4.373 0.000 -0.559 -0.213
genres_Free to Play -6.1307 0.184 -33.382 0.000 -6.491 -5.771
genres_Strategy 0.9648 0.105 9.150 0.000 0.758 1.171
genres_Adventure 0.4093 0.086 4.744 0.000 0.240 0.578
genres_Indie -3.7317 0.095 -39.285 0.000 -3.918 -3.546
genres_RPG 0.5923 0.113 5.252 0.000 0.371 0.813
genres_Casual -2.1332 0.085 -24.955 0.000 -2.301 -1.966
genres_Simulation 1.5896 0.105 15.085 0.000 1.383 1.796
genres_Racing -0.2558 0.215 -1.190 0.234 -0.677 0.166
genres_Violent 0.1569 0.300 0.523 0.601 -0.432 0.745
genres_Massively Multiplayer -1.0994 0.361 -3.049 0.002 -1.806 -0.393
genres_Nudity 1.1908 0.496 2.400 0.016 0.218 2.163
genres_Sports 1.2100 0.193 6.259 0.000 0.831 1.589
genres_Early Access 1.3827 0.134 10.349 0.000 1.121 1.645
genres_Gore -0.1369 0.369 -0.370 0.711 -0.861 0.587
genres_Sexual Content 0.9241 0.513 1.801 0.072 -0.081 1.930
categories_Multi-player 1.3980 0.152 9.198 0.000 1.100 1.696
categories_Online Multi-Player 1.8265 0.201 9.107 0.000 1.433 2.220
categories_Local Multi-Player 0.0646 0.228 0.284 0.776 -0.382 0.511
categories_Valve Anti-Cheat enabled 3.1379 0.707 4.440 0.000 1.753 4.523
categories_Single-player 1.3099 0.224 5.848 0.000 0.871 1.749
categories_Steam Cloud 1.7587 0.102 17.280 0.000 1.559 1.958
categories_Steam Achievements 0.6710 0.096 7.021 0.000 0.484 0.858
categories_Steam Trading Cards 0.5796 0.102 5.679 0.000 0.380 0.780
categories_Captions available 0.0765 0.249 0.307 0.759 -0.412 0.565
categories_Partial Controller Support 0.9660 0.117 8.289 0.000 0.738 1.194
categories_Includes Source SDK -2.8936 1.175 -2.462 0.014 -5.197 -0.590
categories_Cross-Platform Multiplayer -1.2881 0.229 -5.613 0.000 -1.738 -0.838
categories_Stats -0.6329 0.166 -3.812 0.000 -0.958 -0.307
categories_Commentary available -0.4946 0.540 -0.917 0.359 -1.552 0.563
categories_Includes level editor -0.0701 0.240 -0.292 0.770 -0.540 0.400
categories_Steam Workshop 2.1671 0.262 8.259 0.000 1.653 2.681
categories_In-App Purchases -1.2810 0.293 -4.378 0.000 -1.855 -0.708
categories_Co-op 1.1491 0.244 4.705 0.000 0.670 1.628
categories_Full controller support 1.9063 0.116 16.451 0.000 1.679 2.133
categories_Steam Leaderboards -0.5609 0.132 -4.238 0.000 -0.820 -0.302
categories_SteamVR Collectibles 8.6050 1.049 8.205 0.000 6.549 10.660
categories_Online Co-op 0.0803 0.308 0.261 0.794 -0.523 0.684
categories_Shared/Split Screen -0.1753 0.194 -0.904 0.366 -0.555 0.205
categories_Local Co-op -0.5647 0.314 -1.796 0.073 -1.181 0.052
categories_MMO 0.3569 0.454 0.785 0.432 -0.534 1.248
categories_VR Support 1.4219 0.441 3.225 0.001 0.558 2.286
categories_Mods -1.3181 6.512 -0.202 0.840 -14.082 11.446
categories_Mods (require HL2) -0.6887 9.023 -0.076 0.939 -18.374 16.997
categories_Steam Turn Notifications 1.1034 0.816 1.353 0.176 -0.495 2.702
achievements 2.375e-06 0.000 0.021 0.983 -0.000 0.000
%_ratings 1.4823 0.174 8.504 0.000 1.141 1.824
QTD_ratings 1.007e-05 2.57e-06 3.913 0.000 5.03e-06 1.51e-05
owners_mean -9.834e-08 4.51e-08 -2.180 0.029 -1.87e-07 -9.92e-09
average_playtime_>0 1.0625 0.110 9.634 0.000 0.846 1.279
Omnibus: 51073.102 Durbin-Watson: 1.908
Prob(Omnibus): 0.000 Jarque-Bera (JB): 575051620.740
Skew: 13.954 Prob(JB): 0.00
Kurtosis: 719.457 Cond. No. 1.51e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.51e+09. This might indicate that there are
strong multicollinearity or other numerical problems.

Rodada a primeira regressão, vemos que os números não foram muito animadores. As variáveis abaixo serão excluidas do modelo, e veremos se temos alguma melhora.

In [44]:
Retirar = ['english',
'platforms_windows',
'platforms_mac',
'genres_Racing',
'genres_Violent',
'genres_Gore',
'genres_Sexual Content',
'categories_Local Multi-Player',
'categories_Captions available',
'categories_Commentary available',
'categories_Includes level editor',
'categories_Online Co-op',
'categories_Shared/Split Screen',
'categories_Local Co-op',
'categories_MMO',
'categories_Mods',
'categories_Mods (require HL2)',
'categories_Steam Turn Notifications',
'achievements']
In [45]:
for i in Retirar:
    Colunas_all.remove(i)
In [46]:
len(Colunas_all)
Out[46]:
36
In [47]:
X_all = sm.add_constant(df_all[Colunas_all])
In [48]:
regressão_all = sm.OLS(y_all, X_all).fit()
In [49]:
regressão_all.summary()
Out[49]:
OLS Regression Results
Dep. Variable: price R-squared: 0.227
Model: OLS Adj. R-squared: 0.226
Method: Least Squares F-statistic: 218.6
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:55 Log-Likelihood: -87798.
No. Observations: 26846 AIC: 1.757e+05
Df Residuals: 26809 BIC: 1.760e+05
Df Model: 36
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -389.9933 43.131 -9.042 0.000 -474.533 -305.454
Year 0.1957 0.021 9.148 0.000 0.154 0.238
platforms_linux -0.2884 0.106 -2.732 0.006 -0.495 -0.081
genres_Action -0.3976 0.087 -4.558 0.000 -0.569 -0.227
genres_Free to Play -6.1063 0.183 -33.422 0.000 -6.464 -5.748
genres_Strategy 0.9723 0.105 9.269 0.000 0.767 1.178
genres_Adventure 0.4258 0.086 4.975 0.000 0.258 0.594
genres_Indie -3.7323 0.095 -39.449 0.000 -3.918 -3.547
genres_RPG 0.6105 0.111 5.492 0.000 0.393 0.828
genres_Casual -2.1394 0.085 -25.234 0.000 -2.306 -1.973
genres_Simulation 1.5934 0.105 15.177 0.000 1.388 1.799
genres_Massively Multiplayer -0.8851 0.285 -3.104 0.002 -1.444 -0.326
genres_Nudity 1.7458 0.394 4.430 0.000 0.973 2.518
genres_Sports 1.1259 0.187 6.009 0.000 0.759 1.493
genres_Early Access 1.3809 0.133 10.347 0.000 1.119 1.642
categories_Multi-player 1.3906 0.145 9.564 0.000 1.106 1.676
categories_Online Multi-Player 1.8707 0.178 10.535 0.000 1.523 2.219
categories_Valve Anti-Cheat enabled 3.1420 0.700 4.488 0.000 1.770 4.514
categories_Single-player 1.3157 0.222 5.936 0.000 0.881 1.750
categories_Steam Cloud 1.7538 0.101 17.298 0.000 1.555 1.953
categories_Steam Achievements 0.6959 0.094 7.392 0.000 0.511 0.880
categories_Steam Trading Cards 0.5861 0.102 5.755 0.000 0.386 0.786
categories_Partial Controller Support 0.9309 0.115 8.085 0.000 0.705 1.157
categories_Includes Source SDK -3.0793 1.136 -2.712 0.007 -5.305 -0.854
categories_Cross-Platform Multiplayer -1.1902 0.226 -5.265 0.000 -1.633 -0.747
categories_Stats -0.6367 0.165 -3.848 0.000 -0.961 -0.312
categories_Steam Workshop 2.1350 0.231 9.224 0.000 1.681 2.589
categories_In-App Purchases -1.2358 0.291 -4.251 0.000 -1.806 -0.666
categories_Co-op 0.8958 0.151 5.935 0.000 0.600 1.192
categories_Full controller support 1.8488 0.113 16.425 0.000 1.628 2.069
categories_Steam Leaderboards -0.5677 0.132 -4.304 0.000 -0.826 -0.309
categories_SteamVR Collectibles 8.6575 1.048 8.261 0.000 6.603 10.712
categories_VR Support 1.4336 0.440 3.255 0.001 0.570 2.297
%_ratings 1.4765 0.173 8.513 0.000 1.137 1.816
QTD_ratings 1.005e-05 2.57e-06 3.908 0.000 5.01e-06 1.51e-05
owners_mean -9.592e-08 4.5e-08 -2.131 0.033 -1.84e-07 -7.69e-09
average_playtime_>0 1.0790 0.110 9.802 0.000 0.863 1.295
Omnibus: 51057.600 Durbin-Watson: 1.908
Prob(Omnibus): 0.000 Jarque-Bera (JB): 573664514.219
Skew: 13.945 Prob(JB): 0.00
Kurtosis: 718.592 Cond. No. 1.49e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.49e+09. This might indicate that there are
strong multicollinearity or other numerical problems.

O baixo valor de R² indica que, de fato, essas variáveis sou pouco eficientes para explicar o preço dos jogos. Vou tentar fazer esses mesmos cálculos para log_price e log_log_price.

In [50]:
Colunas_all = Colunas_Tratada.copy()
In [51]:
len(Colunas_all)
Out[51]:
55
In [52]:
y_all = df_all['log_price']
X_all = sm.add_constant(df_all[Colunas_all])
regressão_all = sm.OLS(y_all, X_all).fit()
regressão_all.summary()
Out[52]:
OLS Regression Results
Dep. Variable: log_price R-squared: 0.402
Model: OLS Adj. R-squared: 0.401
Method: Least Squares F-statistic: 327.5
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:55 Log-Likelihood: -27270.
No. Observations: 26846 AIC: 5.465e+04
Df Residuals: 26790 BIC: 5.511e+04
Df Model: 55
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 31.4903 4.593 6.856 0.000 22.487 40.493
Year -0.0151 0.002 -6.657 0.000 -0.020 -0.011
english 0.1190 0.031 3.898 0.000 0.059 0.179
platforms_windows 0.2571 0.300 0.858 0.391 -0.330 0.845
platforms_mac 0.0959 0.012 8.173 0.000 0.073 0.119
platforms_linux -0.0572 0.014 -4.211 0.000 -0.084 -0.031
genres_Action -0.0174 0.009 -1.872 0.061 -0.036 0.001
genres_Free to Play -1.5431 0.019 -80.062 0.000 -1.581 -1.505
genres_Strategy 0.1629 0.011 14.719 0.000 0.141 0.185
genres_Adventure 0.1056 0.009 11.668 0.000 0.088 0.123
genres_Indie -0.3806 0.010 -38.175 0.000 -0.400 -0.361
genres_RPG 0.0889 0.012 7.509 0.000 0.066 0.112
genres_Casual -0.2525 0.009 -28.148 0.000 -0.270 -0.235
genres_Simulation 0.1882 0.011 17.018 0.000 0.167 0.210
genres_Racing -0.0223 0.023 -0.989 0.322 -0.067 0.022
genres_Violent 0.0463 0.032 1.469 0.142 -0.015 0.108
genres_Massively Multiplayer -0.1053 0.038 -2.783 0.005 -0.179 -0.031
genres_Nudity 0.1440 0.052 2.765 0.006 0.042 0.246
genres_Sports 0.1560 0.020 7.691 0.000 0.116 0.196
genres_Early Access 0.2744 0.014 19.570 0.000 0.247 0.302
genres_Gore -0.0612 0.039 -1.578 0.115 -0.137 0.015
genres_Sexual Content 0.1221 0.054 2.268 0.023 0.017 0.228
categories_Multi-player 0.1231 0.016 7.719 0.000 0.092 0.154
categories_Online Multi-Player 0.1608 0.021 7.638 0.000 0.120 0.202
categories_Local Multi-Player 0.0399 0.024 1.672 0.095 -0.007 0.087
categories_Valve Anti-Cheat enabled 0.1034 0.074 1.394 0.163 -0.042 0.249
categories_Single-player 0.1271 0.024 5.405 0.000 0.081 0.173
categories_Steam Cloud 0.2369 0.011 22.175 0.000 0.216 0.258
categories_Steam Achievements 0.0697 0.010 6.954 0.000 0.050 0.089
categories_Steam Trading Cards 0.0983 0.011 9.182 0.000 0.077 0.119
categories_Captions available 0.0163 0.026 0.623 0.534 -0.035 0.068
categories_Partial Controller Support 0.1001 0.012 8.182 0.000 0.076 0.124
categories_Includes Source SDK -0.2945 0.123 -2.388 0.017 -0.536 -0.053
categories_Cross-Platform Multiplayer -0.1068 0.024 -4.435 0.000 -0.154 -0.060
categories_Stats -0.0536 0.017 -3.076 0.002 -0.088 -0.019
categories_Commentary available -0.0449 0.057 -0.792 0.428 -0.156 0.066
categories_Includes level editor -0.0112 0.025 -0.445 0.657 -0.061 0.038
categories_Steam Workshop 0.2219 0.028 8.058 0.000 0.168 0.276
categories_In-App Purchases -0.2507 0.031 -8.165 0.000 -0.311 -0.191
categories_Co-op 0.0995 0.026 3.883 0.000 0.049 0.150
categories_Full controller support 0.2230 0.012 18.337 0.000 0.199 0.247
categories_Steam Leaderboards -0.0313 0.014 -2.255 0.024 -0.059 -0.004
categories_SteamVR Collectibles 0.8759 0.110 7.959 0.000 0.660 1.092
categories_Online Co-op 0.0267 0.032 0.827 0.408 -0.037 0.090
categories_Shared/Split Screen 0.0032 0.020 0.157 0.875 -0.037 0.043
categories_Local Co-op 0.0018 0.033 0.056 0.956 -0.063 0.067
categories_MMO 0.0206 0.048 0.431 0.666 -0.073 0.114
categories_VR Support 0.1592 0.046 3.441 0.001 0.069 0.250
categories_Mods -0.2117 0.683 -0.310 0.757 -1.551 1.128
categories_Mods (require HL2) -0.0319 0.947 -0.034 0.973 -1.888 1.824
categories_Steam Turn Notifications 0.1267 0.086 1.480 0.139 -0.041 0.294
achievements -3.837e-06 1.17e-05 -0.327 0.744 -2.68e-05 1.92e-05
%_ratings 0.2417 0.018 13.211 0.000 0.206 0.278
QTD_ratings 1.013e-06 2.7e-07 3.750 0.000 4.83e-07 1.54e-06
owners_mean -1.419e-08 4.73e-09 -2.998 0.003 -2.35e-08 -4.92e-09
average_playtime_>0 -0.0027 0.012 -0.229 0.819 -0.025 0.020
Omnibus: 377.283 Durbin-Watson: 1.867
Prob(Omnibus): 0.000 Jarque-Bera (JB): 532.838
Skew: -0.177 Prob(JB): 1.98e-116
Kurtosis: 3.592 Cond. No. 1.51e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.51e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
In [53]:
Retirar = ['platforms_windows',
            'genres_Action',
            'genres_Racing',
            'genres_Violent',
            'genres_Gore',
            'categories_Local Multi-Player',
            'categories_Valve Anti-Cheat enabled',
            'categories_Captions available',
            'categories_Commentary available',
            'categories_Includes level editor',
            'categories_Online Co-op',
            'categories_Shared/Split Screen',
            'categories_Local Co-op',
            'categories_MMO',
            'categories_Mods',
            'categories_Mods (require HL2)',
            'categories_Steam Turn Notifications',
            'achievements',
            'average_playtime_>0']
In [54]:
for i in Retirar:
    Colunas_all.remove(i)
In [55]:
len(Colunas_all)
Out[55]:
36
In [56]:
y_all = df_all['log_price']
X_all = sm.add_constant(df_all[Colunas_all])
regressão_all = sm.OLS(y_all, X_all).fit()
regressão_all.summary()
Out[56]:
OLS Regression Results
Dep. Variable: log_price R-squared: 0.402
Model: OLS Adj. R-squared: 0.401
Method: Least Squares F-statistic: 499.9
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:55 Log-Likelihood: -27279.
No. Observations: 26846 AIC: 5.463e+04
Df Residuals: 26809 BIC: 5.494e+04
Df Model: 36
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 30.6930 4.333 7.084 0.000 22.200 39.186
Year -0.0146 0.002 -6.798 0.000 -0.019 -0.010
english 0.1165 0.030 3.823 0.000 0.057 0.176
platforms_mac 0.0986 0.012 8.446 0.000 0.076 0.121
platforms_linux -0.0561 0.014 -4.147 0.000 -0.083 -0.030
genres_Free to Play -1.5439 0.019 -81.109 0.000 -1.581 -1.507
genres_Strategy 0.1662 0.011 15.182 0.000 0.145 0.188
genres_Adventure 0.1050 0.009 11.680 0.000 0.087 0.123
genres_Indie -0.3822 0.010 -38.635 0.000 -0.402 -0.363
genres_RPG 0.0906 0.012 7.740 0.000 0.068 0.114
genres_Casual -0.2490 0.009 -28.280 0.000 -0.266 -0.232
genres_Simulation 0.1881 0.011 17.146 0.000 0.167 0.210
genres_Massively Multiplayer -0.1029 0.030 -3.440 0.001 -0.162 -0.044
genres_Nudity 0.1436 0.052 2.784 0.005 0.042 0.245
genres_Sports 0.1556 0.020 7.916 0.000 0.117 0.194
genres_Early Access 0.2718 0.014 19.463 0.000 0.244 0.299
genres_Sexual Content 0.1276 0.054 2.374 0.018 0.022 0.233
categories_Multi-player 0.1280 0.015 8.403 0.000 0.098 0.158
categories_Online Multi-Player 0.1699 0.019 9.119 0.000 0.133 0.206
categories_Single-player 0.1253 0.023 5.398 0.000 0.080 0.171
categories_Steam Cloud 0.2377 0.011 22.337 0.000 0.217 0.259
categories_Steam Achievements 0.0696 0.010 7.024 0.000 0.050 0.089
categories_Steam Trading Cards 0.0974 0.010 9.570 0.000 0.077 0.117
categories_Partial Controller Support 0.0968 0.012 8.106 0.000 0.073 0.120
categories_Includes Source SDK -0.2931 0.118 -2.480 0.013 -0.525 -0.061
categories_Cross-Platform Multiplayer -0.1066 0.024 -4.467 0.000 -0.153 -0.060
categories_Stats -0.0533 0.017 -3.073 0.002 -0.087 -0.019
categories_Steam Workshop 0.2171 0.024 8.971 0.000 0.170 0.265
categories_In-App Purchases -0.2510 0.031 -8.226 0.000 -0.311 -0.191
categories_Co-op 0.1151 0.016 7.294 0.000 0.084 0.146
categories_Full controller support 0.2216 0.012 19.116 0.000 0.199 0.244
categories_Steam Leaderboards -0.0353 0.014 -2.570 0.010 -0.062 -0.008
categories_SteamVR Collectibles 0.8714 0.110 7.925 0.000 0.656 1.087
categories_VR Support 0.1592 0.046 3.444 0.001 0.069 0.250
%_ratings 0.2451 0.018 13.476 0.000 0.209 0.281
QTD_ratings 1.028e-06 2.7e-07 3.809 0.000 4.99e-07 1.56e-06
owners_mean -1.382e-08 4.67e-09 -2.957 0.003 -2.3e-08 -4.66e-09
Omnibus: 373.288 Durbin-Watson: 1.867
Prob(Omnibus): 0.000 Jarque-Bera (JB): 523.825
Skew: -0.178 Prob(JB): 1.79e-114
Kurtosis: 3.585 Cond. No. 1.42e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.42e+09. This might indicate that there are
strong multicollinearity or other numerical problems.

Apesar do valor de R² ter subido, ele ainda indica que as variáveis são pouco explicativas para o modelo. Vamos fazer uma última tentativa usando log_log_price.

In [57]:
Colunas_all = Colunas_Tratada.copy()
len(Colunas_all)
Out[57]:
55
In [58]:
y_all = df_all['log_log_price']
X_all = sm.add_constant(df_all[Colunas_all])
regressão_all = sm.OLS(y_all, X_all).fit()
regressão_all.summary()
Out[58]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.466
Model: OLS Adj. R-squared: 0.465
Method: Least Squares F-statistic: 425.9
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:55 Log-Likelihood: -4238.4
No. Observations: 26846 AIC: 8589.
Df Residuals: 26790 BIC: 9048.
Df Model: 55
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 21.5461 1.948 11.062 0.000 17.728 25.364
Year -0.0104 0.001 -10.789 0.000 -0.012 -0.009
english 0.0641 0.013 4.951 0.000 0.039 0.089
platforms_windows 0.1471 0.127 1.157 0.247 -0.102 0.396
platforms_mac 0.0462 0.005 9.282 0.000 0.036 0.056
platforms_linux -0.0247 0.006 -4.291 0.000 -0.036 -0.013
genres_Action 7.322e-05 0.004 0.019 0.985 -0.008 0.008
genres_Free to Play -0.8592 0.008 -105.134 0.000 -0.875 -0.843
genres_Strategy 0.0695 0.005 14.803 0.000 0.060 0.079
genres_Adventure 0.0467 0.004 12.172 0.000 0.039 0.054
genres_Indie -0.1332 0.004 -31.516 0.000 -0.142 -0.125
genres_RPG 0.0358 0.005 7.137 0.000 0.026 0.046
genres_Casual -0.0906 0.004 -23.808 0.000 -0.098 -0.083
genres_Simulation 0.0697 0.005 14.860 0.000 0.060 0.079
genres_Racing -0.0006 0.010 -0.067 0.947 -0.019 0.018
genres_Violent 0.0210 0.013 1.574 0.115 -0.005 0.047
genres_Massively Multiplayer -0.0271 0.016 -1.687 0.092 -0.059 0.004
genres_Nudity 0.0552 0.022 2.499 0.012 0.012 0.098
genres_Sports 0.0595 0.009 6.910 0.000 0.043 0.076
genres_Early Access 0.1111 0.006 18.685 0.000 0.099 0.123
genres_Gore -0.0263 0.016 -1.600 0.110 -0.059 0.006
genres_Sexual Content 0.0488 0.023 2.137 0.033 0.004 0.094
categories_Multi-player 0.0393 0.007 5.817 0.000 0.026 0.053
categories_Online Multi-Player 0.0487 0.009 5.456 0.000 0.031 0.066
categories_Local Multi-Player 0.0199 0.010 1.961 0.050 6.55e-06 0.040
categories_Valve Anti-Cheat enabled 0.0124 0.031 0.394 0.693 -0.049 0.074
categories_Single-player 0.0549 0.010 5.510 0.000 0.035 0.074
categories_Steam Cloud 0.0936 0.005 20.661 0.000 0.085 0.102
categories_Steam Achievements 0.0273 0.004 6.410 0.000 0.019 0.036
categories_Steam Trading Cards 0.0444 0.005 9.772 0.000 0.035 0.053
categories_Captions available 0.0092 0.011 0.833 0.405 -0.012 0.031
categories_Partial Controller Support 0.0370 0.005 7.136 0.000 0.027 0.047
categories_Includes Source SDK -0.1004 0.052 -1.920 0.055 -0.203 0.002
categories_Cross-Platform Multiplayer -0.0372 0.010 -3.647 0.000 -0.057 -0.017
categories_Stats -0.0190 0.007 -2.572 0.010 -0.033 -0.005
categories_Commentary available -0.0148 0.024 -0.617 0.537 -0.062 0.032
categories_Includes level editor -0.0046 0.011 -0.432 0.665 -0.026 0.016
categories_Steam Workshop 0.0743 0.012 6.364 0.000 0.051 0.097
categories_In-App Purchases -0.1205 0.013 -9.253 0.000 -0.146 -0.095
categories_Co-op 0.0319 0.011 2.933 0.003 0.011 0.053
categories_Full controller support 0.0823 0.005 15.966 0.000 0.072 0.092
categories_Steam Leaderboards -0.0067 0.006 -1.141 0.254 -0.018 0.005
categories_SteamVR Collectibles 0.3040 0.047 6.514 0.000 0.213 0.395
categories_Online Co-op 0.0142 0.014 1.034 0.301 -0.013 0.041
categories_Shared/Split Screen 0.0045 0.009 0.520 0.603 -0.012 0.021
categories_Local Co-op 0.0073 0.014 0.519 0.603 -0.020 0.035
categories_MMO -0.0006 0.020 -0.029 0.977 -0.040 0.039
categories_VR Support 0.0518 0.020 2.637 0.008 0.013 0.090
categories_Mods -0.0800 0.290 -0.276 0.782 -0.648 0.488
categories_Mods (require HL2) -0.0138 0.402 -0.034 0.973 -0.801 0.773
categories_Steam Turn Notifications 0.0491 0.036 1.354 0.176 -0.022 0.120
achievements -3.65e-06 4.98e-06 -0.734 0.463 -1.34e-05 6.1e-06
%_ratings 0.0888 0.008 11.451 0.000 0.074 0.104
QTD_ratings 3.919e-07 1.15e-07 3.421 0.001 1.67e-07 6.16e-07
owners_mean -6.079e-09 2.01e-09 -3.028 0.002 -1e-08 -2.14e-09
average_playtime_>0 -0.0202 0.005 -4.108 0.000 -0.030 -0.011
Omnibus: 4473.999 Durbin-Watson: 1.884
Prob(Omnibus): 0.000 Jarque-Bera (JB): 9342.587
Skew: -0.998 Prob(JB): 0.00
Kurtosis: 5.090 Cond. No. 1.51e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.51e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
In [59]:
Retirar = ['platforms_windows',
'genres_Action',
'genres_Racing',
'genres_Violent',
'genres_Massively Multiplayer',
'genres_Gore',
'categories_Local Multi-Player',
'categories_Valve Anti-Cheat enabled',
'categories_Captions available',
'categories_Commentary available',
'categories_Includes level editor',
'categories_Includes Source SDK',
'categories_Steam Leaderboards',
'categories_Online Co-op',
'categories_Shared/Split Screen',
'categories_Local Co-op',
'categories_MMO',
'categories_Mods',
'categories_Mods (require HL2)',
'categories_Steam Turn Notifications',
'achievements']
In [60]:
for i in Retirar:
    Colunas_all.remove(i)
In [61]:
len(Colunas_all)
Out[61]:
34
In [62]:
y_all = df_all['log_log_price']
X_all = sm.add_constant(df_all[Colunas_all])
regressão_all = sm.OLS(y_all, X_all).fit()
regressão_all.summary()
Out[62]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.466
Model: OLS Adj. R-squared: 0.465
Method: Least Squares F-statistic: 687.9
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:56 Log-Likelihood: -4253.0
No. Observations: 26846 AIC: 8576.
Df Residuals: 26811 BIC: 8863.
Df Model: 34
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 20.9406 1.921 10.901 0.000 17.176 24.706
Year -0.0100 0.001 -10.526 0.000 -0.012 -0.008
english 0.0646 0.013 4.999 0.000 0.039 0.090
platforms_mac 0.0471 0.005 9.518 0.000 0.037 0.057
platforms_linux -0.0246 0.006 -4.286 0.000 -0.036 -0.013
genres_Free to Play -0.8632 0.008 -107.002 0.000 -0.879 -0.847
genres_Strategy 0.0699 0.005 15.061 0.000 0.061 0.079
genres_Adventure 0.0463 0.004 12.255 0.000 0.039 0.054
genres_Indie -0.1329 0.004 -31.688 0.000 -0.141 -0.125
genres_RPG 0.0349 0.005 7.068 0.000 0.025 0.045
genres_Casual -0.0902 0.004 -24.155 0.000 -0.098 -0.083
genres_Simulation 0.0686 0.005 14.753 0.000 0.059 0.078
genres_Nudity 0.0561 0.022 2.565 0.010 0.013 0.099
genres_Sports 0.0610 0.008 7.328 0.000 0.045 0.077
genres_Early Access 0.1106 0.006 18.681 0.000 0.099 0.122
genres_Sexual Content 0.0503 0.023 2.208 0.027 0.006 0.095
categories_Multi-player 0.0414 0.006 6.407 0.000 0.029 0.054
categories_Online Multi-Player 0.0518 0.008 6.610 0.000 0.036 0.067
categories_Single-player 0.0608 0.009 6.442 0.000 0.042 0.079
categories_Steam Cloud 0.0937 0.004 20.839 0.000 0.085 0.103
categories_Steam Achievements 0.0255 0.004 6.135 0.000 0.017 0.034
categories_Steam Trading Cards 0.0448 0.005 9.879 0.000 0.036 0.054
categories_Partial Controller Support 0.0381 0.005 7.527 0.000 0.028 0.048
categories_Cross-Platform Multiplayer -0.0389 0.010 -3.848 0.000 -0.059 -0.019
categories_Stats -0.0211 0.007 -2.965 0.003 -0.035 -0.007
categories_Steam Workshop 0.0709 0.010 6.908 0.000 0.051 0.091
categories_In-App Purchases -0.1255 0.013 -9.790 0.000 -0.151 -0.100
categories_Co-op 0.0432 0.007 6.460 0.000 0.030 0.056
categories_Full controller support 0.0851 0.005 17.417 0.000 0.076 0.095
categories_SteamVR Collectibles 0.3056 0.047 6.554 0.000 0.214 0.397
categories_VR Support 0.0496 0.020 2.529 0.011 0.011 0.088
%_ratings 0.0899 0.008 11.662 0.000 0.075 0.105
QTD_ratings 4.121e-07 1.14e-07 3.609 0.000 1.88e-07 6.36e-07
owners_mean -6.508e-09 1.98e-09 -3.295 0.001 -1.04e-08 -2.64e-09
average_playtime_>0 -0.0214 0.005 -4.367 0.000 -0.031 -0.012
Omnibus: 4469.682 Durbin-Watson: 1.884
Prob(Omnibus): 0.000 Jarque-Bera (JB): 9292.591
Skew: -0.999 Prob(JB): 0.00
Kurtosis: 5.077 Cond. No. 1.49e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.49e+09. This might indicate that there are
strong multicollinearity or other numerical problems.

Os números não foram muito animadores. Como o maior valor de R² foi o encontrado na última regressão, vou utiliza-la de base para a análise. Podemos perceber que a variável que tem o maior coeficiente foi 'genres_Free to Play', que se refere ao fato de o jogo ser ou não gratuito para jogar. Podemos assumir que jogos Free to Play possuem um modelo diferente de agariar lucro. Usualmente, são vendidos a preços baixos. Faz sentido o seu coeficiente (beta) ser o maior e negativamente ligado.

In [63]:
regressão_all.params.round(3)
Out[63]:
const                                    20.941
Year                                     -0.010
english                                   0.065
platforms_mac                             0.047
platforms_linux                          -0.025
genres_Free to Play                      -0.863
genres_Strategy                           0.070
genres_Adventure                          0.046
genres_Indie                             -0.133
genres_RPG                                0.035
genres_Casual                            -0.090
genres_Simulation                         0.069
genres_Nudity                             0.056
genres_Sports                             0.061
genres_Early Access                       0.111
genres_Sexual Content                     0.050
categories_Multi-player                   0.041
categories_Online Multi-Player            0.052
categories_Single-player                  0.061
categories_Steam Cloud                    0.094
categories_Steam Achievements             0.025
categories_Steam Trading Cards            0.045
categories_Partial Controller Support     0.038
categories_Cross-Platform Multiplayer    -0.039
categories_Stats                         -0.021
categories_Steam Workshop                 0.071
categories_In-App Purchases              -0.126
categories_Co-op                          0.043
categories_Full controller support        0.085
categories_SteamVR Collectibles           0.306
categories_VR Support                     0.050
%_ratings                                 0.090
QTD_ratings                               0.000
owners_mean                              -0.000
average_playtime_>0                      -0.021
dtype: float64

Vamos analisar agora o grupo com média de owners igual a 10 mil.

In [64]:
Colunas_Tratada_min = ['Year','english','platforms_windows','platforms_mac','platforms_linux','genres_Action',
           'genres_Free to Play', 'genres_Strategy','genres_Adventure', 'genres_Indie', 'genres_RPG',
           'genres_Casual','genres_Simulation', 'genres_Racing', 'genres_Violent','genres_Massively Multiplayer',
           'genres_Nudity', 'genres_Sports',       'genres_Early Access', 'genres_Gore', 'genres_Sexual Content',
           'categories_Multi-player', 'categories_Online Multi-Player','categories_Local Multi-Player',
           'categories_Valve Anti-Cheat enabled','categories_Single-player', 'categories_Steam Cloud',
           'categories_Steam Achievements', 'categories_Steam Trading Cards','categories_Captions available',
           'categories_Partial Controller Support','categories_Includes Source SDK','categories_Cross-Platform Multiplayer',
           'categories_Stats','categories_Commentary available', 'categories_Includes level editor',
           'categories_Steam Workshop', 'categories_In-App Purchases','categories_Co-op',
           'categories_Full controller support','categories_Steam Leaderboards', 'categories_SteamVR Collectibles',
           'categories_Online Co-op', 'categories_Shared/Split Screen','categories_Local Co-op', 'categories_MMO', 
           'categories_VR Support','categories_Mods', 'categories_Mods (require HL2)',
           'categories_Steam Turn Notifications','achievements','%_ratings','QTD_ratings',
           'average_playtime_>0']
In [65]:
Colunas_min = Colunas_Tratada_min.copy()
In [66]:
len(Colunas_min)
Out[66]:
54
In [67]:
y_min = df_min['price']
X_min = sm.add_constant(df_min[Colunas_min])
regressão_min = sm.OLS(y_min, X_min).fit()
regressão_min.summary()
Out[67]:
OLS Regression Results
Dep. Variable: price R-squared: 0.166
Model: OLS Adj. R-squared: 0.164
Method: Least Squares F-statistic: 71.64
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:56 Log-Likelihood: -59711.
No. Observations: 18433 AIC: 1.195e+05
Df Residuals: 18381 BIC: 1.199e+05
Df Model: 51
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -311.8648 72.437 -4.305 0.000 -453.848 -169.881
Year 0.1557 0.036 4.339 0.000 0.085 0.226
english 0.4209 0.310 1.357 0.175 -0.187 1.029
platforms_windows 1.9260 3.095 0.622 0.534 -4.141 7.993
platforms_mac 0.1136 0.132 0.862 0.389 -0.145 0.372
platforms_linux -0.5594 0.158 -3.551 0.000 -0.868 -0.251
genres_Action -0.2527 0.103 -2.458 0.014 -0.454 -0.051
genres_Free to Play -5.0359 0.289 -17.441 0.000 -5.602 -4.470
genres_Strategy 0.9118 0.125 7.296 0.000 0.667 1.157
genres_Adventure 0.4790 0.101 4.767 0.000 0.282 0.676
genres_Indie -2.9808 0.114 -26.218 0.000 -3.204 -2.758
genres_RPG 0.1408 0.138 1.022 0.307 -0.129 0.411
genres_Casual -1.6170 0.097 -16.642 0.000 -1.807 -1.427
genres_Simulation 1.1744 0.122 9.625 0.000 0.935 1.414
genres_Racing -0.2796 0.250 -1.117 0.264 -0.770 0.211
genres_Violent -0.0032 0.323 -0.010 0.992 -0.637 0.631
genres_Massively Multiplayer -1.2348 0.458 -2.696 0.007 -2.133 -0.337
genres_Nudity 0.7567 0.571 1.325 0.185 -0.362 1.876
genres_Sports 1.2458 0.214 5.826 0.000 0.827 1.665
genres_Early Access 1.3856 0.143 9.700 0.000 1.106 1.666
genres_Gore 0.0050 0.402 0.012 0.990 -0.783 0.793
genres_Sexual Content 0.6300 0.585 1.078 0.281 -0.516 1.776
categories_Multi-player 1.4576 0.208 6.998 0.000 1.049 1.866
categories_Online Multi-Player 1.1737 0.251 4.669 0.000 0.681 1.666
categories_Local Multi-Player 0.2564 0.263 0.974 0.330 -0.260 0.772
categories_Valve Anti-Cheat enabled -4.454e-12 4.4e-12 -1.011 0.312 -1.31e-11 4.18e-12
categories_Single-player 1.3736 0.281 4.880 0.000 0.822 1.925
categories_Steam Cloud 1.2682 0.124 10.195 0.000 1.024 1.512
categories_Steam Achievements 0.1300 0.110 1.182 0.237 -0.086 0.346
categories_Steam Trading Cards 0.7725 0.137 5.658 0.000 0.505 1.040
categories_Captions available -0.3222 0.314 -1.025 0.305 -0.938 0.294
categories_Partial Controller Support 0.5106 0.139 3.665 0.000 0.238 0.784
categories_Includes Source SDK -2.5364 2.350 -1.079 0.281 -7.143 2.070
categories_Cross-Platform Multiplayer -0.9633 0.316 -3.046 0.002 -1.583 -0.343
categories_Stats -0.1566 0.203 -0.771 0.441 -0.555 0.241
categories_Commentary available -1.7112 0.786 -2.178 0.029 -3.251 -0.171
categories_Includes level editor 0.5018 0.313 1.601 0.109 -0.112 1.116
categories_Steam Workshop 0.5315 0.377 1.409 0.159 -0.208 1.271
categories_In-App Purchases -1.4337 0.496 -2.892 0.004 -2.406 -0.462
categories_Co-op 0.2049 0.379 0.541 0.588 -0.537 0.947
categories_Full controller support 1.0400 0.137 7.585 0.000 0.771 1.309
categories_Steam Leaderboards -0.4202 0.161 -2.605 0.009 -0.736 -0.104
categories_SteamVR Collectibles 5.2839 1.795 2.944 0.003 1.766 8.802
categories_Online Co-op 0.2065 0.426 0.485 0.628 -0.629 1.042
categories_Shared/Split Screen -0.0623 0.245 -0.254 0.799 -0.543 0.418
categories_Local Co-op 0.4431 0.420 1.056 0.291 -0.380 1.266
categories_MMO 1.4430 0.695 2.076 0.038 0.080 2.806
categories_VR Support 1.2317 0.577 2.136 0.033 0.102 2.362
categories_Mods 3.73e-15 3.54e-15 1.053 0.292 -3.21e-15 1.07e-14
categories_Mods (require HL2) -1.527e-15 1.2e-15 -1.276 0.202 -3.87e-15 8.19e-16
categories_Steam Turn Notifications 0.4714 1.021 0.462 0.644 -1.530 2.473
achievements 7.605e-05 0.000 0.624 0.533 -0.000 0.000
%_ratings 0.6842 0.188 3.645 0.000 0.316 1.052
QTD_ratings 0.0168 0.001 24.143 0.000 0.015 0.018
average_playtime_>0 -0.7861 0.237 -3.322 0.001 -1.250 -0.322
Omnibus: 42026.115 Durbin-Watson: 1.973
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1094610824.774
Skew: 21.328 Prob(JB): 0.00
Kurtosis: 1196.054 Cond. No. 1.00e+16


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 7.5e-22. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
In [68]:
y_min = df_min['log_price']
X_min = sm.add_constant(df_min[Colunas_min])
regressão_min = sm.OLS(y_min, X_min).fit()
regressão_min.summary()
Out[68]:
OLS Regression Results
Dep. Variable: log_price R-squared: 0.298
Model: OLS Adj. R-squared: 0.296
Method: Least Squares F-statistic: 153.0
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:56 Log-Likelihood: -18279.
No. Observations: 18433 AIC: 3.666e+04
Df Residuals: 18381 BIC: 3.707e+04
Df Model: 51
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 29.1218 7.652 3.806 0.000 14.123 44.121
Year -0.0141 0.004 -3.714 0.000 -0.022 -0.007
english 0.1477 0.033 4.505 0.000 0.083 0.212
platforms_windows 0.4629 0.327 1.416 0.157 -0.178 1.104
platforms_mac 0.0914 0.014 6.560 0.000 0.064 0.119
platforms_linux -0.0815 0.017 -4.899 0.000 -0.114 -0.049
genres_Action -0.0033 0.011 -0.304 0.761 -0.025 0.018
genres_Free to Play -1.4173 0.031 -46.464 0.000 -1.477 -1.358
genres_Strategy 0.1658 0.013 12.555 0.000 0.140 0.192
genres_Adventure 0.1127 0.011 10.614 0.000 0.092 0.134
genres_Indie -0.3475 0.012 -28.937 0.000 -0.371 -0.324
genres_RPG 0.0462 0.015 3.174 0.002 0.018 0.075
genres_Casual -0.2047 0.010 -19.942 0.000 -0.225 -0.185
genres_Simulation 0.1361 0.013 10.561 0.000 0.111 0.161
genres_Racing -0.0151 0.026 -0.570 0.569 -0.067 0.037
genres_Violent 0.0577 0.034 1.688 0.091 -0.009 0.125
genres_Massively Multiplayer -0.1543 0.048 -3.188 0.001 -0.249 -0.059
genres_Nudity 0.1155 0.060 1.914 0.056 -0.003 0.234
genres_Sports 0.1677 0.023 7.426 0.000 0.123 0.212
genres_Early Access 0.2748 0.015 18.209 0.000 0.245 0.304
genres_Gore -0.0662 0.042 -1.559 0.119 -0.149 0.017
genres_Sexual Content 0.0845 0.062 1.368 0.171 -0.037 0.206
categories_Multi-player 0.1465 0.022 6.657 0.000 0.103 0.190
categories_Online Multi-Player 0.1020 0.027 3.841 0.000 0.050 0.154
categories_Local Multi-Player 0.0564 0.028 2.027 0.043 0.002 0.111
categories_Valve Anti-Cheat enabled -4.801e-13 4.65e-13 -1.032 0.302 -1.39e-12 4.32e-13
categories_Single-player 0.1609 0.030 5.409 0.000 0.103 0.219
categories_Steam Cloud 0.1990 0.013 15.147 0.000 0.173 0.225
categories_Steam Achievements 0.0084 0.012 0.726 0.468 -0.014 0.031
categories_Steam Trading Cards 0.1523 0.014 10.558 0.000 0.124 0.181
categories_Captions available 0.0020 0.033 0.061 0.951 -0.063 0.067
categories_Partial Controller Support 0.0572 0.015 3.886 0.000 0.028 0.086
categories_Includes Source SDK -0.2645 0.248 -1.065 0.287 -0.751 0.222
categories_Cross-Platform Multiplayer -0.0552 0.033 -1.652 0.099 -0.121 0.010
categories_Stats -0.0178 0.021 -0.829 0.407 -0.060 0.024
categories_Commentary available -0.2417 0.083 -2.912 0.004 -0.404 -0.079
categories_Includes level editor 0.0288 0.033 0.870 0.384 -0.036 0.094
categories_Steam Workshop 0.0796 0.040 1.999 0.046 0.002 0.158
categories_In-App Purchases -0.3219 0.052 -6.145 0.000 -0.425 -0.219
categories_Co-op 0.0313 0.040 0.783 0.434 -0.047 0.110
categories_Full controller support 0.1514 0.014 10.456 0.000 0.123 0.180
categories_Steam Leaderboards -0.0218 0.017 -1.279 0.201 -0.055 0.012
categories_SteamVR Collectibles 0.5682 0.190 2.997 0.003 0.197 0.940
categories_Online Co-op 0.0334 0.045 0.741 0.459 -0.055 0.122
categories_Shared/Split Screen 0.0033 0.026 0.129 0.898 -0.047 0.054
categories_Local Co-op 0.0813 0.044 1.833 0.067 -0.006 0.168
categories_MMO 0.1926 0.073 2.623 0.009 0.049 0.337
categories_VR Support 0.1774 0.061 2.913 0.004 0.058 0.297
categories_Mods 4.307e-16 3.74e-16 1.151 0.250 -3.02e-16 1.16e-15
categories_Mods (require HL2) -1.857e-16 1.26e-16 -1.468 0.142 -4.33e-16 6.22e-17
categories_Steam Turn Notifications 0.0127 0.108 0.118 0.906 -0.199 0.224
achievements 1.837e-05 1.29e-05 1.426 0.154 -6.88e-06 4.36e-05
%_ratings 0.1499 0.020 7.558 0.000 0.111 0.189
QTD_ratings 0.0016 7.35e-05 21.691 0.000 0.001 0.002
average_playtime_>0 -0.1088 0.025 -4.352 0.000 -0.158 -0.060
Omnibus: 164.007 Durbin-Watson: 1.904
Prob(Omnibus): 0.000 Jarque-Bera (JB): 236.478
Skew: -0.103 Prob(JB): 4.46e-52
Kurtosis: 3.516 Cond. No. 1.00e+16


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 7.5e-22. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
In [69]:
y_min = df_min['log_log_price']
X_min = sm.add_constant(df_min[Colunas_min])
regressão_min = sm.OLS(y_min, X_min).fit()
regressão_min.summary()
Out[69]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.325
Model: OLS Adj. R-squared: 0.323
Method: Least Squares F-statistic: 173.6
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:56 Log-Likelihood: -2768.2
No. Observations: 18433 AIC: 5640.
Df Residuals: 18381 BIC: 6047.
Df Model: 51
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 22.2608 3.299 6.748 0.000 15.795 28.727
Year -0.0108 0.002 -6.612 0.000 -0.014 -0.008
english 0.0731 0.014 5.171 0.000 0.045 0.101
platforms_windows 0.2413 0.141 1.712 0.087 -0.035 0.518
platforms_mac 0.0460 0.006 7.670 0.000 0.034 0.058
platforms_linux -0.0341 0.007 -4.759 0.000 -0.048 -0.020
genres_Action 0.0047 0.005 0.997 0.319 -0.005 0.014
genres_Free to Play -0.8084 0.013 -61.477 0.000 -0.834 -0.783
genres_Strategy 0.0729 0.006 12.804 0.000 0.062 0.084
genres_Adventure 0.0507 0.005 11.087 0.000 0.042 0.060
genres_Indie -0.1259 0.005 -24.312 0.000 -0.136 -0.116
genres_RPG 0.0220 0.006 3.510 0.000 0.010 0.034
genres_Casual -0.0754 0.004 -17.034 0.000 -0.084 -0.067
genres_Simulation 0.0513 0.006 9.229 0.000 0.040 0.062
genres_Racing 0.0048 0.011 0.417 0.677 -0.018 0.027
genres_Violent 0.0297 0.015 2.019 0.043 0.001 0.059
genres_Massively Multiplayer -0.0445 0.021 -2.135 0.033 -0.085 -0.004
genres_Nudity 0.0474 0.026 1.823 0.068 -0.004 0.098
genres_Sports 0.0651 0.010 6.683 0.000 0.046 0.084
genres_Early Access 0.1109 0.007 17.050 0.000 0.098 0.124
genres_Gore -0.0311 0.018 -1.699 0.089 -0.067 0.005
genres_Sexual Content 0.0333 0.027 1.249 0.212 -0.019 0.085
categories_Multi-player 0.0527 0.009 5.554 0.000 0.034 0.071
categories_Online Multi-Player 0.0248 0.011 2.169 0.030 0.002 0.047
categories_Local Multi-Player 0.0247 0.012 2.056 0.040 0.001 0.048
categories_Valve Anti-Cheat enabled -1.334e-13 2.01e-13 -0.665 0.506 -5.27e-13 2.6e-13
categories_Single-player 0.0707 0.013 5.512 0.000 0.046 0.096
categories_Steam Cloud 0.0810 0.006 14.307 0.000 0.070 0.092
categories_Steam Achievements 0.0058 0.005 1.161 0.246 -0.004 0.016
categories_Steam Trading Cards 0.0657 0.006 10.565 0.000 0.054 0.078
categories_Captions available 0.0104 0.014 0.724 0.469 -0.018 0.038
categories_Partial Controller Support 0.0225 0.006 3.552 0.000 0.010 0.035
categories_Includes Source SDK -0.0708 0.107 -0.661 0.508 -0.281 0.139
categories_Cross-Platform Multiplayer -0.0149 0.014 -1.032 0.302 -0.043 0.013
categories_Stats -0.0076 0.009 -0.816 0.414 -0.026 0.011
categories_Commentary available -0.0919 0.036 -2.569 0.010 -0.162 -0.022
categories_Includes level editor 0.0072 0.014 0.505 0.614 -0.021 0.035
categories_Steam Workshop 0.0249 0.017 1.450 0.147 -0.009 0.059
categories_In-App Purchases -0.1668 0.023 -7.388 0.000 -0.211 -0.123
categories_Co-op 0.0142 0.017 0.822 0.411 -0.020 0.048
categories_Full controller support 0.0601 0.006 9.625 0.000 0.048 0.072
categories_Steam Leaderboards -0.0045 0.007 -0.617 0.538 -0.019 0.010
categories_SteamVR Collectibles 0.1830 0.082 2.239 0.025 0.023 0.343
categories_Online Co-op 0.0132 0.019 0.680 0.496 -0.025 0.051
categories_Shared/Split Screen 0.0019 0.011 0.174 0.862 -0.020 0.024
categories_Local Co-op 0.0300 0.019 1.571 0.116 -0.007 0.068
categories_MMO 0.0659 0.032 2.081 0.037 0.004 0.128
categories_VR Support 0.0682 0.026 2.599 0.009 0.017 0.120
categories_Mods 1.334e-16 1.61e-16 0.827 0.408 -1.83e-16 4.49e-16
categories_Mods (require HL2) -5.973e-17 5.45e-17 -1.096 0.273 -1.67e-16 4.71e-17
categories_Steam Turn Notifications -0.0005 0.046 -0.010 0.992 -0.092 0.091
achievements 6.589e-06 5.55e-06 1.187 0.235 -4.29e-06 1.75e-05
%_ratings 0.0594 0.009 6.951 0.000 0.043 0.076
QTD_ratings 0.0005 3.17e-05 16.371 0.000 0.000 0.001
average_playtime_>0 -0.0433 0.011 -4.015 0.000 -0.064 -0.022
Omnibus: 2481.648 Durbin-Watson: 1.908
Prob(Omnibus): 0.000 Jarque-Bera (JB): 4471.626
Skew: -0.882 Prob(JB): 0.00
Kurtosis: 4.647 Cond. No. 1.00e+16


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 7.5e-22. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
In [70]:
regressão_min.params.round(3)
Out[70]:
const                                    22.261
Year                                     -0.011
english                                   0.073
platforms_windows                         0.241
platforms_mac                             0.046
platforms_linux                          -0.034
genres_Action                             0.005
genres_Free to Play                      -0.808
genres_Strategy                           0.073
genres_Adventure                          0.051
genres_Indie                             -0.126
genres_RPG                                0.022
genres_Casual                            -0.075
genres_Simulation                         0.051
genres_Racing                             0.005
genres_Violent                            0.030
genres_Massively Multiplayer             -0.045
genres_Nudity                             0.047
genres_Sports                             0.065
genres_Early Access                       0.111
genres_Gore                              -0.031
genres_Sexual Content                     0.033
categories_Multi-player                   0.053
categories_Online Multi-Player            0.025
categories_Local Multi-Player             0.025
categories_Valve Anti-Cheat enabled      -0.000
categories_Single-player                  0.071
categories_Steam Cloud                    0.081
categories_Steam Achievements             0.006
categories_Steam Trading Cards            0.066
categories_Captions available             0.010
categories_Partial Controller Support     0.023
categories_Includes Source SDK           -0.071
categories_Cross-Platform Multiplayer    -0.015
categories_Stats                         -0.008
categories_Commentary available          -0.092
categories_Includes level editor          0.007
categories_Steam Workshop                 0.025
categories_In-App Purchases              -0.167
categories_Co-op                          0.014
categories_Full controller support        0.060
categories_Steam Leaderboards            -0.005
categories_SteamVR Collectibles           0.183
categories_Online Co-op                   0.013
categories_Shared/Split Screen            0.002
categories_Local Co-op                    0.030
categories_MMO                            0.066
categories_VR Support                     0.068
categories_Mods                           0.000
categories_Mods (require HL2)            -0.000
categories_Steam Turn Notifications      -0.000
achievements                              0.000
%_ratings                                 0.059
QTD_ratings                               0.001
average_playtime_>0                      -0.043
dtype: float64

Podemos perceber que, no caso dos jogos com menos de 10 mil owners, os dados tem um potencial de explicação de preço ainda menor. Novamente, temos a variável "free to play" como principal contribuinte, provavelmente seguindo a mesma lógica que já foi elencada acima.

Por fim, vamos analizar o último grupo, com owners maiores que 10 mil em média.

In [71]:
Colunas_Tratada_max = ['Year','english','platforms_windows','platforms_mac','platforms_linux','genres_Action',
           'genres_Free to Play', 'genres_Strategy','genres_Adventure', 'genres_Indie', 'genres_RPG',
           'genres_Casual','genres_Simulation', 'genres_Racing', 'genres_Violent','genres_Massively Multiplayer',
           'genres_Nudity', 'genres_Sports',       'genres_Early Access', 'genres_Gore', 'genres_Sexual Content',
           'categories_Multi-player', 'categories_Online Multi-Player','categories_Local Multi-Player',
           'categories_Valve Anti-Cheat enabled','categories_Single-player', 'categories_Steam Cloud',
           'categories_Steam Achievements', 'categories_Steam Trading Cards','categories_Captions available',
           'categories_Partial Controller Support','categories_Includes Source SDK','categories_Cross-Platform Multiplayer',
           'categories_Stats','categories_Commentary available', 'categories_Includes level editor',
           'categories_Steam Workshop', 'categories_In-App Purchases','categories_Co-op',
           'categories_Full controller support','categories_Steam Leaderboards', 'categories_SteamVR Collectibles',
           'categories_Online Co-op', 'categories_Shared/Split Screen','categories_Local Co-op', 'categories_MMO', 
           'categories_VR Support','categories_Mods', 'categories_Mods (require HL2)',
           'categories_Steam Turn Notifications','achievements','%_ratings','QTD_ratings','owners_mean',
           'average_playtime_>0']
In [72]:
Colunas_max = Colunas_Tratada_max.copy()
len(Colunas_max)
Out[72]:
55
In [73]:
y_max = df_max['price']
X_max = sm.add_constant(df_max[Colunas_max])
regressão_max = sm.OLS(y_max, X_max).fit()
regressão_max.summary()
Out[73]:
OLS Regression Results
Dep. Variable: price R-squared: 0.388
Model: OLS Adj. R-squared: 0.384
Method: Least Squares F-statistic: 96.33
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:56 Log-Likelihood: -27470.
No. Observations: 8413 AIC: 5.505e+04
Df Residuals: 8357 BIC: 5.545e+04
Df Model: 55
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -935.5121 65.796 -14.218 0.000 -1064.489 -806.535
Year 0.4664 0.032 14.369 0.000 0.403 0.530
english 1.0929 0.719 1.519 0.129 -0.317 2.503
platforms_windows -0.4316 6.408 -0.067 0.946 -12.994 12.131
platforms_mac 0.0030 0.199 0.015 0.988 -0.387 0.393
platforms_linux -0.0895 0.216 -0.414 0.679 -0.513 0.334
genres_Action -0.5595 0.161 -3.473 0.001 -0.875 -0.244
genres_Free to Play -7.7073 0.257 -30.039 0.000 -8.210 -7.204
genres_Strategy 1.1245 0.186 6.055 0.000 0.760 1.489
genres_Adventure -0.0217 0.158 -0.138 0.890 -0.331 0.288
genres_Indie -4.5956 0.166 -27.752 0.000 -4.920 -4.271
genres_RPG 1.0531 0.187 5.620 0.000 0.686 1.420
genres_Casual -2.8100 0.167 -16.831 0.000 -3.137 -2.483
genres_Simulation 1.4421 0.198 7.296 0.000 1.055 1.830
genres_Racing -0.1355 0.392 -0.346 0.729 -0.904 0.633
genres_Violent 0.3475 0.695 0.500 0.617 -1.016 1.711
genres_Massively Multiplayer -1.1192 0.561 -1.995 0.046 -2.219 -0.019
genres_Nudity 1.4402 0.927 1.553 0.120 -0.378 3.258
genres_Sports 0.9563 0.407 2.352 0.019 0.159 1.753
genres_Early Access 1.0763 0.330 3.266 0.001 0.430 1.722
genres_Gore -0.4743 0.818 -0.580 0.562 -2.077 1.128
genres_Sexual Content 1.4741 0.980 1.504 0.133 -0.447 3.395
categories_Multi-player 1.1916 0.221 5.395 0.000 0.759 1.625
categories_Online Multi-Player 2.0345 0.335 6.080 0.000 1.379 2.690
categories_Local Multi-Player 0.3598 0.455 0.791 0.429 -0.532 1.252
categories_Valve Anti-Cheat enabled 3.0742 0.718 4.281 0.000 1.667 4.482
categories_Single-player 1.0659 0.360 2.964 0.003 0.361 1.771
categories_Steam Cloud 1.8655 0.170 10.988 0.000 1.533 2.198
categories_Steam Achievements 1.4979 0.181 8.287 0.000 1.144 1.852
categories_Steam Trading Cards -0.9873 0.169 -5.847 0.000 -1.318 -0.656
categories_Captions available 0.7383 0.394 1.872 0.061 -0.035 1.511
categories_Partial Controller Support 1.6088 0.201 7.986 0.000 1.214 2.004
categories_Includes Source SDK -3.2982 1.362 -2.421 0.015 -5.968 -0.628
categories_Cross-Platform Multiplayer -1.4277 0.327 -4.361 0.000 -2.070 -0.786
categories_Stats -1.2137 0.273 -4.439 0.000 -1.750 -0.678
categories_Commentary available 0.1619 0.724 0.223 0.823 -1.258 1.582
categories_Includes level editor -0.5101 0.360 -1.417 0.156 -1.216 0.195
categories_Steam Workshop 2.7755 0.362 7.658 0.000 2.065 3.486
categories_In-App Purchases -1.1512 0.371 -3.102 0.002 -1.879 -0.424
categories_Co-op 1.6304 0.320 5.098 0.000 1.003 2.257
categories_Full controller support 2.9007 0.206 14.060 0.000 2.496 3.305
categories_Steam Leaderboards -0.7060 0.221 -3.199 0.001 -1.139 -0.273
categories_SteamVR Collectibles 8.9334 1.296 6.893 0.000 6.393 11.474
categories_Online Co-op -0.2944 0.454 -0.648 0.517 -1.185 0.596
categories_Shared/Split Screen -0.4074 0.307 -1.328 0.184 -1.009 0.194
categories_Local Co-op -0.8173 0.553 -1.477 0.140 -1.902 0.267
categories_MMO 0.0942 0.624 0.151 0.880 -1.129 1.318
categories_VR Support 0.7735 0.664 1.165 0.244 -0.528 2.075
categories_Mods 0.8817 6.534 0.135 0.893 -11.927 13.691
categories_Mods (require HL2) -1.3331 9.010 -0.148 0.882 -18.996 16.330
categories_Steam Turn Notifications 1.7765 1.293 1.374 0.170 -0.759 4.312
achievements -0.0005 0.000 -2.089 0.037 -0.001 -3.17e-05
%_ratings 3.3826 0.426 7.937 0.000 2.547 4.218
QTD_ratings 7.736e-06 2.58e-06 2.998 0.003 2.68e-06 1.28e-05
owners_mean -7.349e-08 4.57e-08 -1.609 0.108 -1.63e-07 1.61e-08
average_playtime_>0 0.7185 0.154 4.658 0.000 0.416 1.021
Omnibus: 3263.985 Durbin-Watson: 1.831
Prob(Omnibus): 0.000 Jarque-Bera (JB): 18777.182
Skew: 1.767 Prob(JB): 0.00
Kurtosis: 9.410 Cond. No. 2.27e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.27e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
In [74]:
y_max = df_max['log_price']
X_max = sm.add_constant(df_max[Colunas_max])
regressão_max = sm.OLS(y_max, X_max).fit()
regressão_max.summary()
Out[74]:
OLS Regression Results
Dep. Variable: log_price R-squared: 0.580
Model: OLS Adj. R-squared: 0.577
Method: Least Squares F-statistic: 209.5
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:57 Log-Likelihood: -8510.5
No. Observations: 8413 AIC: 1.713e+04
Df Residuals: 8357 BIC: 1.753e+04
Df Model: 55
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 4.9718 6.910 0.719 0.472 -8.574 18.518
Year -0.0017 0.003 -0.492 0.623 -0.008 0.005
english 0.1727 0.076 2.286 0.022 0.025 0.321
platforms_windows -0.4215 0.673 -0.626 0.531 -1.741 0.898
platforms_mac 0.0778 0.021 3.723 0.000 0.037 0.119
platforms_linux -0.0149 0.023 -0.656 0.512 -0.059 0.030
genres_Action -0.0388 0.017 -2.293 0.022 -0.072 -0.006
genres_Free to Play -1.6742 0.027 -62.128 0.000 -1.727 -1.621
genres_Strategy 0.1632 0.020 8.366 0.000 0.125 0.201
genres_Adventure 0.0634 0.017 3.826 0.000 0.031 0.096
genres_Indie -0.3956 0.017 -22.744 0.000 -0.430 -0.361
genres_RPG 0.1359 0.020 6.906 0.000 0.097 0.174
genres_Casual -0.3229 0.018 -18.415 0.000 -0.357 -0.289
genres_Simulation 0.2035 0.021 9.804 0.000 0.163 0.244
genres_Racing -0.0161 0.041 -0.390 0.696 -0.097 0.065
genres_Violent -0.0873 0.073 -1.196 0.232 -0.231 0.056
genres_Massively Multiplayer -0.0614 0.059 -1.041 0.298 -0.177 0.054
genres_Nudity 0.1353 0.097 1.390 0.165 -0.056 0.326
genres_Sports 0.0951 0.043 2.227 0.026 0.011 0.179
genres_Early Access 0.2169 0.035 6.267 0.000 0.149 0.285
genres_Gore 0.0095 0.086 0.111 0.912 -0.159 0.178
genres_Sexual Content 0.2024 0.103 1.967 0.049 0.001 0.404
categories_Multi-player 0.1001 0.023 4.316 0.000 0.055 0.146
categories_Online Multi-Player 0.1739 0.035 4.948 0.000 0.105 0.243
categories_Local Multi-Player 0.0521 0.048 1.090 0.276 -0.042 0.146
categories_Valve Anti-Cheat enabled 0.0676 0.075 0.897 0.370 -0.080 0.215
categories_Single-player 0.0591 0.038 1.564 0.118 -0.015 0.133
categories_Steam Cloud 0.2425 0.018 13.600 0.000 0.208 0.277
categories_Steam Achievements 0.1763 0.019 9.290 0.000 0.139 0.214
categories_Steam Trading Cards -0.0714 0.018 -4.024 0.000 -0.106 -0.037
categories_Captions available 0.0498 0.041 1.203 0.229 -0.031 0.131
categories_Partial Controller Support 0.1617 0.021 7.640 0.000 0.120 0.203
categories_Includes Source SDK -0.3575 0.143 -2.499 0.012 -0.638 -0.077
categories_Cross-Platform Multiplayer -0.1570 0.034 -4.567 0.000 -0.224 -0.090
categories_Stats -0.1040 0.029 -3.622 0.000 -0.160 -0.048
categories_Commentary available 0.0815 0.076 1.071 0.284 -0.068 0.231
categories_Includes level editor -0.0405 0.038 -1.072 0.284 -0.115 0.034
categories_Steam Workshop 0.2799 0.038 7.352 0.000 0.205 0.354
categories_In-App Purchases -0.1666 0.039 -4.274 0.000 -0.243 -0.090
categories_Co-op 0.1385 0.034 4.122 0.000 0.073 0.204
categories_Full controller support 0.3011 0.022 13.896 0.000 0.259 0.344
categories_Steam Leaderboards -0.0397 0.023 -1.712 0.087 -0.085 0.006
categories_SteamVR Collectibles 0.9491 0.136 6.972 0.000 0.682 1.216
categories_Online Co-op 0.0088 0.048 0.184 0.854 -0.085 0.102
categories_Shared/Split Screen -0.0013 0.032 -0.040 0.968 -0.064 0.062
categories_Local Co-op -0.0378 0.058 -0.651 0.515 -0.152 0.076
categories_MMO -0.0658 0.066 -1.005 0.315 -0.194 0.063
categories_VR Support 0.0631 0.070 0.905 0.365 -0.074 0.200
categories_Mods -0.0759 0.686 -0.111 0.912 -1.421 1.269
categories_Mods (require HL2) -0.1592 0.946 -0.168 0.866 -2.014 1.696
categories_Steam Turn Notifications 0.2249 0.136 1.655 0.098 -0.041 0.491
achievements -0.0001 2.59e-05 -3.949 0.000 -0.000 -5.16e-05
%_ratings 0.4881 0.045 10.904 0.000 0.400 0.576
QTD_ratings 8.256e-07 2.71e-07 3.047 0.002 2.94e-07 1.36e-06
owners_mean -1.292e-08 4.8e-09 -2.692 0.007 -2.23e-08 -3.51e-09
average_playtime_>0 -0.0019 0.016 -0.119 0.905 -0.034 0.030
Omnibus: 483.183 Durbin-Watson: 1.821
Prob(Omnibus): 0.000 Jarque-Bera (JB): 736.072
Skew: -0.491 Prob(JB): 1.46e-160
Kurtosis: 4.066 Cond. No. 2.27e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.27e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
In [75]:
y_max = df_max['log_log_price']
X_max = sm.add_constant(df_max[Colunas_max])
regressão_max = sm.OLS(y_max, X_max).fit()
regressão_max.summary()
Out[75]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.655
Model: OLS Adj. R-squared: 0.652
Method: Least Squares F-statistic: 287.9
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:57 Log-Likelihood: -1156.3
No. Observations: 8413 AIC: 2425.
Df Residuals: 8357 BIC: 2819.
Df Model: 55
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 16.9866 2.883 5.892 0.000 11.335 22.638
Year -0.0080 0.001 -5.621 0.000 -0.011 -0.005
english 0.0844 0.032 2.677 0.007 0.023 0.146
platforms_windows -0.1773 0.281 -0.631 0.528 -0.728 0.373
platforms_mac 0.0381 0.009 4.370 0.000 0.021 0.055
platforms_linux -0.0077 0.009 -0.813 0.416 -0.026 0.011
genres_Action -0.0081 0.007 -1.143 0.253 -0.022 0.006
genres_Free to Play -0.8975 0.011 -79.829 0.000 -0.920 -0.875
genres_Strategy 0.0649 0.008 7.977 0.000 0.049 0.081
genres_Adventure 0.0293 0.007 4.233 0.000 0.016 0.043
genres_Indie -0.1330 0.007 -18.335 0.000 -0.147 -0.119
genres_RPG 0.0512 0.008 6.235 0.000 0.035 0.067
genres_Casual -0.1150 0.007 -15.715 0.000 -0.129 -0.101
genres_Simulation 0.0776 0.009 8.957 0.000 0.061 0.095
genres_Racing -0.0021 0.017 -0.120 0.905 -0.036 0.032
genres_Violent -0.0521 0.030 -1.709 0.087 -0.112 0.008
genres_Massively Multiplayer -0.0115 0.025 -0.467 0.640 -0.060 0.037
genres_Nudity 0.0476 0.041 1.171 0.242 -0.032 0.127
genres_Sports 0.0314 0.018 1.763 0.078 -0.004 0.066
genres_Early Access 0.0869 0.014 6.019 0.000 0.059 0.115
genres_Gore 0.0148 0.036 0.413 0.680 -0.055 0.085
genres_Sexual Content 0.0860 0.043 2.002 0.045 0.002 0.170
categories_Multi-player 0.0292 0.010 3.015 0.003 0.010 0.048
categories_Online Multi-Player 0.0605 0.015 4.123 0.000 0.032 0.089
categories_Local Multi-Player 0.0202 0.020 1.013 0.311 -0.019 0.059
categories_Valve Anti-Cheat enabled -0.0052 0.031 -0.167 0.868 -0.067 0.056
categories_Single-player 0.0248 0.016 1.571 0.116 -0.006 0.056
categories_Steam Cloud 0.0981 0.007 13.182 0.000 0.083 0.113
categories_Steam Achievements 0.0674 0.008 8.511 0.000 0.052 0.083
categories_Steam Trading Cards -0.0105 0.007 -1.415 0.157 -0.025 0.004
categories_Captions available 0.0115 0.017 0.664 0.506 -0.022 0.045
categories_Partial Controller Support 0.0584 0.009 6.615 0.000 0.041 0.076
categories_Includes Source SDK -0.1298 0.060 -2.174 0.030 -0.247 -0.013
categories_Cross-Platform Multiplayer -0.0609 0.014 -4.247 0.000 -0.089 -0.033
categories_Stats -0.0368 0.012 -3.073 0.002 -0.060 -0.013
categories_Commentary available 0.0376 0.032 1.185 0.236 -0.025 0.100
categories_Includes level editor -0.0125 0.016 -0.793 0.428 -0.043 0.018
categories_Steam Workshop 0.0975 0.016 6.137 0.000 0.066 0.129
categories_In-App Purchases -0.0778 0.016 -4.782 0.000 -0.110 -0.046
categories_Co-op 0.0437 0.014 3.116 0.002 0.016 0.071
categories_Full controller support 0.1057 0.009 11.690 0.000 0.088 0.123
categories_Steam Leaderboards -0.0081 0.010 -0.834 0.404 -0.027 0.011
categories_SteamVR Collectibles 0.3519 0.057 6.196 0.000 0.241 0.463
categories_Online Co-op 0.0120 0.020 0.604 0.546 -0.027 0.051
categories_Shared/Split Screen 0.0056 0.013 0.418 0.676 -0.021 0.032
categories_Local Co-op -0.0076 0.024 -0.316 0.752 -0.055 0.040
categories_MMO -0.0378 0.027 -1.382 0.167 -0.091 0.016
categories_VR Support 0.0087 0.029 0.298 0.766 -0.048 0.066
categories_Mods -0.0481 0.286 -0.168 0.867 -0.609 0.513
categories_Mods (require HL2) -0.0631 0.395 -0.160 0.873 -0.837 0.711
categories_Steam Turn Notifications 0.0905 0.057 1.598 0.110 -0.021 0.202
achievements -4.553e-05 1.08e-05 -4.210 0.000 -6.67e-05 -2.43e-05
%_ratings 0.1698 0.019 9.093 0.000 0.133 0.206
QTD_ratings 3.355e-07 1.13e-07 2.968 0.003 1.14e-07 5.57e-07
owners_mean -5.97e-09 2e-09 -2.982 0.003 -9.89e-09 -2.05e-09
average_playtime_>0 -0.0094 0.007 -1.395 0.163 -0.023 0.004
Omnibus: 2275.705 Durbin-Watson: 1.835
Prob(Omnibus): 0.000 Jarque-Bera (JB): 6998.792
Skew: -1.388 Prob(JB): 0.00
Kurtosis: 6.501 Cond. No. 2.27e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.27e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
In [76]:
regressão_max.params.round(3)
Out[76]:
const                                    16.987
Year                                     -0.008
english                                   0.084
platforms_windows                        -0.177
platforms_mac                             0.038
platforms_linux                          -0.008
genres_Action                            -0.008
genres_Free to Play                      -0.898
genres_Strategy                           0.065
genres_Adventure                          0.029
genres_Indie                             -0.133
genres_RPG                                0.051
genres_Casual                            -0.115
genres_Simulation                         0.078
genres_Racing                            -0.002
genres_Violent                           -0.052
genres_Massively Multiplayer             -0.011
genres_Nudity                             0.048
genres_Sports                             0.031
genres_Early Access                       0.087
genres_Gore                               0.015
genres_Sexual Content                     0.086
categories_Multi-player                   0.029
categories_Online Multi-Player            0.060
categories_Local Multi-Player             0.020
categories_Valve Anti-Cheat enabled      -0.005
categories_Single-player                  0.025
categories_Steam Cloud                    0.098
categories_Steam Achievements             0.067
categories_Steam Trading Cards           -0.010
categories_Captions available             0.011
categories_Partial Controller Support     0.058
categories_Includes Source SDK           -0.130
categories_Cross-Platform Multiplayer    -0.061
categories_Stats                         -0.037
categories_Commentary available           0.038
categories_Includes level editor         -0.013
categories_Steam Workshop                 0.097
categories_In-App Purchases              -0.078
categories_Co-op                          0.044
categories_Full controller support        0.106
categories_Steam Leaderboards            -0.008
categories_SteamVR Collectibles           0.352
categories_Online Co-op                   0.012
categories_Shared/Split Screen            0.006
categories_Local Co-op                   -0.008
categories_MMO                           -0.038
categories_VR Support                     0.009
categories_Mods                          -0.048
categories_Mods (require HL2)            -0.063
categories_Steam Turn Notifications       0.091
achievements                             -0.000
%_ratings                                 0.170
QTD_ratings                               0.000
owners_mean                              -0.000
average_playtime_>0                      -0.009
dtype: float64
In [77]:
regressão_max.pvalues.round(3)
Out[77]:
const                                    0.000
Year                                     0.000
english                                  0.007
platforms_windows                        0.528
platforms_mac                            0.000
platforms_linux                          0.416
genres_Action                            0.253
genres_Free to Play                      0.000
genres_Strategy                          0.000
genres_Adventure                         0.000
genres_Indie                             0.000
genres_RPG                               0.000
genres_Casual                            0.000
genres_Simulation                        0.000
genres_Racing                            0.905
genres_Violent                           0.087
genres_Massively Multiplayer             0.640
genres_Nudity                            0.242
genres_Sports                            0.078
genres_Early Access                      0.000
genres_Gore                              0.680
genres_Sexual Content                    0.045
categories_Multi-player                  0.003
categories_Online Multi-Player           0.000
categories_Local Multi-Player            0.311
categories_Valve Anti-Cheat enabled      0.868
categories_Single-player                 0.116
categories_Steam Cloud                   0.000
categories_Steam Achievements            0.000
categories_Steam Trading Cards           0.157
categories_Captions available            0.506
categories_Partial Controller Support    0.000
categories_Includes Source SDK           0.030
categories_Cross-Platform Multiplayer    0.000
categories_Stats                         0.002
categories_Commentary available          0.236
categories_Includes level editor         0.428
categories_Steam Workshop                0.000
categories_In-App Purchases              0.000
categories_Co-op                         0.002
categories_Full controller support       0.000
categories_Steam Leaderboards            0.404
categories_SteamVR Collectibles          0.000
categories_Online Co-op                  0.546
categories_Shared/Split Screen           0.676
categories_Local Co-op                   0.752
categories_MMO                           0.167
categories_VR Support                    0.766
categories_Mods                          0.867
categories_Mods (require HL2)            0.873
categories_Steam Turn Notifications      0.110
achievements                             0.000
%_ratings                                0.000
QTD_ratings                              0.003
owners_mean                              0.003
average_playtime_>0                      0.163
dtype: float64
In [78]:
type(regressão_max.pvalues.round(3))
Out[78]:
pandas.core.series.Series
In [79]:
colunas = regressão_max.pvalues.round(3)
In [80]:
colunas[colunas >= 0.05].index.tolist()
Out[80]:
['platforms_windows',
 'platforms_linux',
 'genres_Action',
 'genres_Racing',
 'genres_Violent',
 'genres_Massively Multiplayer',
 'genres_Nudity',
 'genres_Sports',
 'genres_Gore',
 'categories_Local Multi-Player',
 'categories_Valve Anti-Cheat enabled',
 'categories_Single-player',
 'categories_Steam Trading Cards',
 'categories_Captions available',
 'categories_Commentary available',
 'categories_Includes level editor',
 'categories_Steam Leaderboards',
 'categories_Online Co-op',
 'categories_Shared/Split Screen',
 'categories_Local Co-op',
 'categories_MMO',
 'categories_VR Support',
 'categories_Mods',
 'categories_Mods (require HL2)',
 'categories_Steam Turn Notifications',
 'average_playtime_>0']
In [81]:
for i in colunas[colunas >= 0.05].index.tolist():
    Colunas_max.remove(i)
In [82]:
Colunas_max
Out[82]:
['Year',
 'english',
 'platforms_mac',
 'genres_Free to Play',
 'genres_Strategy',
 'genres_Adventure',
 'genres_Indie',
 'genres_RPG',
 'genres_Casual',
 'genres_Simulation',
 'genres_Early Access',
 'genres_Sexual Content',
 'categories_Multi-player',
 'categories_Online Multi-Player',
 'categories_Steam Cloud',
 'categories_Steam Achievements',
 'categories_Partial Controller Support',
 'categories_Includes Source SDK',
 'categories_Cross-Platform Multiplayer',
 'categories_Stats',
 'categories_Steam Workshop',
 'categories_In-App Purchases',
 'categories_Co-op',
 'categories_Full controller support',
 'categories_SteamVR Collectibles',
 'achievements',
 '%_ratings',
 'QTD_ratings',
 'owners_mean']
In [83]:
y_max = df_max['log_log_price']
X_max = sm.add_constant(df_max[Colunas_max])
regressão_max = sm.OLS(y_max, X_max).fit()
regressão_max.summary()
Out[83]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.653
Model: OLS Adj. R-squared: 0.652
Method: Least Squares F-statistic: 543.6
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:57 Log-Likelihood: -1176.7
No. Observations: 8413 AIC: 2413.
Df Residuals: 8383 BIC: 2625.
Df Model: 29
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 16.2402 2.771 5.861 0.000 10.808 21.672
Year -0.0077 0.001 -5.607 0.000 -0.010 -0.005
english 0.0810 0.031 2.575 0.010 0.019 0.143
platforms_mac 0.0348 0.007 5.032 0.000 0.021 0.048
genres_Free to Play -0.9038 0.011 -86.013 0.000 -0.924 -0.883
genres_Strategy 0.0677 0.008 8.547 0.000 0.052 0.083
genres_Adventure 0.0285 0.007 4.205 0.000 0.015 0.042
genres_Indie -0.1361 0.007 -19.185 0.000 -0.150 -0.122
genres_RPG 0.0455 0.008 5.705 0.000 0.030 0.061
genres_Casual -0.1116 0.007 -15.530 0.000 -0.126 -0.098
genres_Simulation 0.0832 0.008 9.882 0.000 0.067 0.100
genres_Early Access 0.0829 0.014 5.833 0.000 0.055 0.111
genres_Sexual Content 0.1084 0.033 3.285 0.001 0.044 0.173
categories_Multi-player 0.0275 0.009 2.991 0.003 0.009 0.046
categories_Online Multi-Player 0.0592 0.012 4.918 0.000 0.036 0.083
categories_Steam Cloud 0.0971 0.007 13.244 0.000 0.083 0.111
categories_Steam Achievements 0.0632 0.008 8.325 0.000 0.048 0.078
categories_Partial Controller Support 0.0585 0.009 6.787 0.000 0.042 0.075
categories_Includes Source SDK -0.1242 0.056 -2.216 0.027 -0.234 -0.014
categories_Cross-Platform Multiplayer -0.0606 0.014 -4.264 0.000 -0.088 -0.033
categories_Stats -0.0381 0.012 -3.304 0.001 -0.061 -0.015
categories_Steam Workshop 0.0894 0.014 6.471 0.000 0.062 0.116
categories_In-App Purchases -0.0891 0.016 -5.616 0.000 -0.120 -0.058
categories_Co-op 0.0432 0.010 4.227 0.000 0.023 0.063
categories_Full controller support 0.1056 0.009 12.370 0.000 0.089 0.122
categories_SteamVR Collectibles 0.3536 0.055 6.466 0.000 0.246 0.461
achievements -4.621e-05 1.08e-05 -4.278 0.000 -6.74e-05 -2.5e-05
%_ratings 0.1785 0.018 9.681 0.000 0.142 0.215
QTD_ratings 3.511e-07 1.13e-07 3.111 0.002 1.3e-07 5.72e-07
owners_mean -6.78e-09 1.97e-09 -3.439 0.001 -1.06e-08 -2.92e-09
Omnibus: 2244.723 Durbin-Watson: 1.835
Prob(Omnibus): 0.000 Jarque-Bera (JB): 6788.798
Skew: -1.376 Prob(JB): 0.00
Kurtosis: 6.435 Cond. No. 2.18e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.18e+09. This might indicate that there are
strong multicollinearity or other numerical problems.
In [84]:
df_max[Colunas_max].corr()
Out[84]:
Year english platforms_mac genres_Free to Play genres_Strategy genres_Adventure genres_Indie genres_RPG genres_Casual genres_Simulation ... categories_Stats categories_Steam Workshop categories_In-App Purchases categories_Co-op categories_Full controller support categories_SteamVR Collectibles achievements %_ratings QTD_ratings owners_mean
Year 1.000000 -0.082978 0.015472 0.236272 -0.043806 0.104617 0.271935 0.043699 0.140550 0.118410 ... 0.010357 0.040704 0.184621 0.045730 0.113789 0.034240 0.067032 -0.071347 -0.034609 -0.110494
english -0.082978 1.000000 0.050706 -0.047607 -0.008205 -0.005878 0.019551 -0.057199 0.004411 0.000797 ... 0.016513 0.019417 -0.021621 0.035900 0.036415 0.005595 0.006466 -0.013599 0.006797 0.011206
platforms_mac 0.015472 0.050706 1.000000 -0.040892 0.059965 0.067276 0.217872 -0.008779 0.060104 0.021443 ... 0.073569 0.104577 -0.016235 -0.008482 0.090161 -0.040691 -0.005986 0.197562 0.031426 0.040690
genres_Free to Play 0.236272 -0.047607 -0.040892 1.000000 0.032072 -0.079276 -0.018441 0.045413 0.068011 0.010515 ... 0.000427 -0.052947 0.464180 0.079062 -0.134629 0.001210 -0.022465 -0.065258 0.037751 0.075699
genres_Strategy -0.043806 -0.008205 0.059965 0.032072 1.000000 -0.203391 -0.024716 0.060289 -0.030255 0.147153 ... 0.030511 0.093756 0.104867 0.030191 -0.155202 -0.005351 0.019209 -0.039162 -0.013012 -0.004210
genres_Adventure 0.104617 -0.005878 0.067276 -0.079276 -0.203391 1.000000 0.114411 0.117141 0.004457 -0.137481 ... -0.056062 -0.073111 -0.069771 -0.037174 0.087080 0.000475 -0.018926 0.027102 -0.005821 -0.023324
genres_Indie 0.271935 0.019551 0.217872 -0.018441 -0.024716 0.114411 1.000000 0.000797 0.168210 -0.002334 ... 0.088599 0.044996 -0.128760 -0.018569 0.107005 0.000336 0.033502 0.040181 -0.050968 -0.086617
genres_RPG 0.043699 -0.057199 -0.008779 0.045413 0.060289 0.117141 0.000797 1.000000 -0.126580 -0.039991 ... -0.011186 -0.019813 0.077305 0.066478 0.011069 -0.016932 0.010250 0.007115 0.005591 0.001766
genres_Casual 0.140550 0.004411 0.060104 0.068011 -0.030255 0.004457 0.168210 -0.126580 1.000000 0.035898 ... 0.011685 -0.059453 -0.001554 -0.095298 -0.096994 0.002146 0.075496 -0.026894 -0.038824 -0.054940
genres_Simulation 0.118410 0.000797 0.021443 0.010515 0.147153 -0.137481 -0.002334 -0.039991 0.035898 1.000000 ... 0.010936 0.157201 0.026719 0.009901 -0.121232 0.012420 -0.013809 -0.056964 -0.005281 -0.015097
genres_Early Access 0.156809 -0.008173 -0.053678 0.117839 0.035959 -0.006464 0.070170 0.043867 -0.022757 0.115386 ... 0.027013 0.055001 0.078209 0.128405 -0.067567 0.013909 -0.007789 -0.041432 -0.004820 -0.012992
genres_Sexual Content 0.054906 -0.003900 0.012657 0.028475 -0.034986 -0.007296 0.027150 -0.012959 0.056012 -0.002834 ... -0.028207 -0.017885 -0.018067 -0.033698 -0.017601 -0.005309 0.015317 0.035679 -0.003583 -0.008454
categories_Multi-player -0.129843 0.037421 -0.056713 0.114288 0.155933 -0.217761 -0.172798 -0.016796 -0.158229 0.039178 ... 0.113883 0.126697 0.155165 0.427217 -0.005881 0.003247 -0.008059 -0.041550 0.078673 0.112265
categories_Online Multi-Player 0.238129 -0.008404 -0.030163 0.267235 0.084508 -0.108480 -0.056573 0.035293 -0.065092 0.042011 ... 0.077435 0.112956 0.397258 0.345696 -0.004689 0.024209 -0.008671 -0.046856 0.029614 0.045289
categories_Steam Cloud 0.036612 -0.013761 0.149301 -0.198136 0.032948 0.050398 0.019819 0.054046 -0.063034 -0.017554 ... 0.128826 0.150431 -0.107761 0.046337 0.309192 -0.003722 0.016706 0.238371 0.012285 0.006290
categories_Steam Achievements 0.198357 0.028044 0.224662 -0.142167 -0.005507 0.050325 0.225157 0.035279 0.017605 -0.029639 ... 0.204219 0.119656 -0.048793 0.054982 0.331717 -0.004717 0.106578 0.183885 0.025047 0.001920
categories_Partial Controller Support -0.023010 0.007794 -0.029121 -0.024322 -0.123526 0.008459 0.014906 -0.015327 -0.065926 -0.005751 ... 0.041487 0.025170 -0.021067 0.067571 -0.257041 -0.026442 -0.013671 0.000122 0.007791 0.012629
categories_Includes Source SDK -0.072379 0.005490 0.030609 0.057522 -0.004300 -0.029001 -0.014593 -0.027096 -0.020345 -0.020101 ... 0.083944 0.077283 0.004151 0.039845 -0.016815 -0.003159 0.001995 0.031617 0.031046 0.109474
categories_Cross-Platform Multiplayer 0.051877 0.021563 0.212020 0.177166 0.157598 -0.087415 0.037836 0.025366 -0.021760 0.012211 ... 0.096315 0.111195 0.230484 0.258109 -0.003721 0.009999 0.009512 -0.001590 0.020451 0.027436
categories_Stats 0.010357 0.016513 0.073569 0.000427 0.030511 -0.056062 0.088599 -0.011186 0.011685 0.010936 ... 1.000000 0.151841 0.028080 0.088999 0.063991 -0.001899 0.034449 0.039899 0.056706 0.051619
categories_Steam Workshop 0.040704 0.019417 0.104577 -0.052947 0.093756 -0.073111 0.044996 -0.019813 -0.059453 0.157201 ... 0.151841 1.000000 -0.013372 0.120398 0.042400 0.003738 0.012106 0.103074 0.112367 0.109260
categories_In-App Purchases 0.184621 -0.021621 -0.016235 0.464180 0.104867 -0.069771 -0.128760 0.077305 -0.001554 0.026719 ... 0.028080 -0.013372 1.000000 0.118510 -0.087035 -0.005351 -0.000845 -0.117775 0.087137 0.112300
categories_Co-op 0.045730 0.035900 -0.008482 0.079062 0.030191 -0.037174 -0.018569 0.066478 -0.095298 0.009901 ... 0.088999 0.120398 0.118510 1.000000 0.113303 0.032167 0.007310 0.013091 0.056475 0.087425
categories_Full controller support 0.113789 0.036415 0.090161 -0.134629 -0.155202 0.087080 0.107005 0.011069 -0.096994 -0.121232 ... 0.063991 0.042400 -0.087035 0.113303 1.000000 -0.017700 -0.006013 0.170011 0.038892 0.012989
categories_SteamVR Collectibles 0.034240 0.005595 -0.040691 0.001210 -0.005351 0.000475 0.000336 -0.016932 0.002146 0.012420 ... -0.001899 0.003738 -0.005351 0.032167 -0.017700 1.000000 -0.004413 0.043450 0.049613 0.127850
achievements 0.067032 0.006466 -0.005986 -0.022465 0.019209 -0.018926 0.033502 0.010250 0.075496 -0.013809 ... 0.034449 0.012106 -0.000845 0.007310 -0.006013 -0.004413 1.000000 -0.025386 0.013696 0.009161
%_ratings -0.071347 -0.013599 0.197562 -0.065258 -0.039162 0.027102 0.040181 0.007115 -0.026894 -0.056964 ... 0.039899 0.103074 -0.117775 0.013091 0.170011 0.043450 -0.025386 1.000000 0.051687 0.067987
QTD_ratings -0.034609 0.006797 0.031426 0.037751 -0.013012 -0.005821 -0.050968 0.005591 -0.038824 -0.005281 ... 0.056706 0.112367 0.087137 0.056475 0.038892 0.049613 0.013696 0.051687 1.000000 0.734385
owners_mean -0.110494 0.011206 0.040690 0.075699 -0.004210 -0.023324 -0.086617 0.001766 -0.054940 -0.015097 ... 0.051619 0.109260 0.112300 0.087425 0.012989 0.127850 0.009161 0.067987 0.734385 1.000000

29 rows × 29 columns

In [85]:
ax = sns.heatmap(df_max[Colunas_max].corr(),xticklabels=True, yticklabels=True)
ax.figure.set_size_inches(15,15)

Os números do terceiro grupo (grupo com média de owners superior a 10 mil) parece mais animadores. Vale a pena estudar um pouco mais esse grupo.
Além disso, parece existir uma correlação alta entre a média de owners e a quantidade de ratings. Como o p-value de owners_mean é menor, vou descartar QTD_ratings, e vou rodar novamente a regressão.

In [86]:
Colunas_max.remove('QTD_ratings')
y_max = df_max['log_log_price']
X_max = sm.add_constant(df_max[Colunas_max])
regressão_max = sm.OLS(y_max, X_max).fit()
regressão_max.summary()
Out[86]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.652
Model: OLS Adj. R-squared: 0.651
Method: Least Squares F-statistic: 562.1
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:59 Log-Likelihood: -1181.5
No. Observations: 8413 AIC: 2421.
Df Residuals: 8384 BIC: 2625.
Df Model: 28
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 15.6777 2.767 5.667 0.000 10.255 21.101
Year -0.0074 0.001 -5.413 0.000 -0.010 -0.005
english 0.0811 0.031 2.577 0.010 0.019 0.143
platforms_mac 0.0345 0.007 4.999 0.000 0.021 0.048
genres_Free to Play -0.9049 0.011 -86.133 0.000 -0.926 -0.884
genres_Strategy 0.0674 0.008 8.510 0.000 0.052 0.083
genres_Adventure 0.0286 0.007 4.225 0.000 0.015 0.042
genres_Indie -0.1362 0.007 -19.195 0.000 -0.150 -0.122
genres_RPG 0.0456 0.008 5.714 0.000 0.030 0.061
genres_Casual -0.1116 0.007 -15.523 0.000 -0.126 -0.098
genres_Simulation 0.0832 0.008 9.871 0.000 0.067 0.100
genres_Early Access 0.0832 0.014 5.847 0.000 0.055 0.111
genres_Sexual Content 0.1087 0.033 3.290 0.001 0.044 0.173
categories_Multi-player 0.0279 0.009 3.037 0.002 0.010 0.046
categories_Online Multi-Player 0.0584 0.012 4.847 0.000 0.035 0.082
categories_Steam Cloud 0.0967 0.007 13.192 0.000 0.082 0.111
categories_Steam Achievements 0.0633 0.008 8.331 0.000 0.048 0.078
categories_Partial Controller Support 0.0587 0.009 6.799 0.000 0.042 0.076
categories_Includes Source SDK -0.1367 0.056 -2.444 0.015 -0.246 -0.027
categories_Cross-Platform Multiplayer -0.0602 0.014 -4.232 0.000 -0.088 -0.032
categories_Stats -0.0372 0.012 -3.225 0.001 -0.060 -0.015
categories_Steam Workshop 0.0913 0.014 6.615 0.000 0.064 0.118
categories_In-App Purchases -0.0883 0.016 -5.564 0.000 -0.119 -0.057
categories_Co-op 0.0426 0.010 4.161 0.000 0.023 0.063
categories_Full controller support 0.1062 0.009 12.447 0.000 0.090 0.123
categories_SteamVR Collectibles 0.3419 0.055 6.263 0.000 0.235 0.449
achievements -4.611e-05 1.08e-05 -4.267 0.000 -6.73e-05 -2.49e-05
%_ratings 0.1788 0.018 9.693 0.000 0.143 0.215
owners_mean -2.313e-09 1.35e-09 -1.711 0.087 -4.96e-09 3.37e-10
Omnibus: 2243.426 Durbin-Watson: 1.837
Prob(Omnibus): 0.000 Jarque-Bera (JB): 6792.928
Skew: -1.374 Prob(JB): 0.00
Kurtosis: 6.439 Cond. No. 2.18e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.18e+09. This might indicate that there are
strong multicollinearity or other numerical problems.

Aparentemente, isso não foi uma boa ideia, já que ambos R² e R² ajustado diminuiram. Vou voltar com a regressão anterior.

In [87]:
Colunas_max.append('QTD_ratings')
y_max = df_max['log_log_price']
X_max = sm.add_constant(df_max[Colunas_max])
regressão_max = sm.OLS(y_max, X_max).fit()
regressão_max.summary()
Out[87]:
OLS Regression Results
Dep. Variable: log_log_price R-squared: 0.653
Model: OLS Adj. R-squared: 0.652
Method: Least Squares F-statistic: 543.6
Date: Sun, 24 Oct 2021 Prob (F-statistic): 0.00
Time: 13:28:59 Log-Likelihood: -1176.7
No. Observations: 8413 AIC: 2413.
Df Residuals: 8383 BIC: 2625.
Df Model: 29
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 16.2402 2.771 5.861 0.000 10.808 21.672
Year -0.0077 0.001 -5.607 0.000 -0.010 -0.005
english 0.0810 0.031 2.575 0.010 0.019 0.143
platforms_mac 0.0348 0.007 5.032 0.000 0.021 0.048
genres_Free to Play -0.9038 0.011 -86.013 0.000 -0.924 -0.883
genres_Strategy 0.0677 0.008 8.547 0.000 0.052 0.083
genres_Adventure 0.0285 0.007 4.205 0.000 0.015 0.042
genres_Indie -0.1361 0.007 -19.185 0.000 -0.150 -0.122
genres_RPG 0.0455 0.008 5.705 0.000 0.030 0.061
genres_Casual -0.1116 0.007 -15.530 0.000 -0.126 -0.098
genres_Simulation 0.0832 0.008 9.882 0.000 0.067 0.100
genres_Early Access 0.0829 0.014 5.833 0.000 0.055 0.111
genres_Sexual Content 0.1084 0.033 3.285 0.001 0.044 0.173
categories_Multi-player 0.0275 0.009 2.991 0.003 0.009 0.046
categories_Online Multi-Player 0.0592 0.012 4.918 0.000 0.036 0.083
categories_Steam Cloud 0.0971 0.007 13.244 0.000 0.083 0.111
categories_Steam Achievements 0.0632 0.008 8.325 0.000 0.048 0.078
categories_Partial Controller Support 0.0585 0.009 6.787 0.000 0.042 0.075
categories_Includes Source SDK -0.1242 0.056 -2.216 0.027 -0.234 -0.014
categories_Cross-Platform Multiplayer -0.0606 0.014 -4.264 0.000 -0.088 -0.033
categories_Stats -0.0381 0.012 -3.304 0.001 -0.061 -0.015
categories_Steam Workshop 0.0894 0.014 6.471 0.000 0.062 0.116
categories_In-App Purchases -0.0891 0.016 -5.616 0.000 -0.120 -0.058
categories_Co-op 0.0432 0.010 4.227 0.000 0.023 0.063
categories_Full controller support 0.1056 0.009 12.370 0.000 0.089 0.122
categories_SteamVR Collectibles 0.3536 0.055 6.466 0.000 0.246 0.461
achievements -4.621e-05 1.08e-05 -4.278 0.000 -6.74e-05 -2.5e-05
%_ratings 0.1785 0.018 9.681 0.000 0.142 0.215
owners_mean -6.78e-09 1.97e-09 -3.439 0.001 -1.06e-08 -2.92e-09
QTD_ratings 3.511e-07 1.13e-07 3.111 0.002 1.3e-07 5.72e-07
Omnibus: 2244.723 Durbin-Watson: 1.835
Prob(Omnibus): 0.000 Jarque-Bera (JB): 6788.798
Skew: -1.376 Prob(JB): 0.00
Kurtosis: 6.435 Cond. No. 2.18e+09


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.18e+09. This might indicate that there are
strong multicollinearity or other numerical problems.

Bom, acho que não custa nada tentar criar um modelo no sklearn pra avaliar se essas variáveis são bons preditivos para o preço de um jogo na steam.

In [88]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
In [89]:
X_train, X_test, y_train, y_test = train_test_split(X_max, y_max, test_size=0.2)
In [90]:
modelo = LinearRegression().fit(X_train, y_train)
In [91]:
modelo.score(X_train, y_train)
Out[91]:
0.6483608304355883
In [92]:
y_previsto = modelo.predict(X_test)
In [93]:
from sklearn import metrics
metrics.r2_score(y_test, y_previsto).round(3)
Out[93]:
0.669
In [94]:
len(y_test)
Out[94]:
1683
In [95]:
type(y_test)
Out[95]:
pandas.core.series.Series
In [96]:
len(y_previsto)
Out[96]:
1683
In [97]:
type(y_previsto)
Out[97]:
numpy.ndarray
In [98]:
d = {'log_log_Y':y_test, 'log_log_Y^':y_previsto}
In [99]:
df_y = pd.DataFrame(d)
In [100]:
df_y
Out[100]:
log_log_Y log_log_Y^
8208 0.000000 -0.232962
2093 1.124342 1.041000
6425 1.079932 1.102279
4802 0.000000 -0.065669
924 1.257577 0.915709
... ... ...
4719 0.668679 1.026345
2831 0.846883 0.840389
2938 1.489226 1.015788
1375 1.026074 0.910699
3259 0.000000 1.078248

1683 rows × 2 columns

In [101]:
df_y['log_log_û'] = df_y['log_log_Y']-df_y['log_log_Y^']
In [102]:
df_y
Out[102]:
log_log_Y log_log_Y^ log_log_û
8208 0.000000 -0.232962 0.232962
2093 1.124342 1.041000 0.083342
6425 1.079932 1.102279 -0.022346
4802 0.000000 -0.065669 0.065669
924 1.257577 0.915709 0.341868
... ... ... ...
4719 0.668679 1.026345 -0.357666
2831 0.846883 0.840389 0.006494
2938 1.489226 1.015788 0.473438
1375 1.026074 0.910699 0.115375
3259 0.000000 1.078248 -1.078248

1683 rows × 3 columns

In [103]:
df_y['Y'] = np.exp(np.exp(df_y['log_log_Y']))
In [104]:
df_y['Y^'] = np.exp(np.exp(df_y['log_log_Y^']))
In [105]:
df_y['û'] = df_y['Y']-df_y['Y^']
In [106]:
df_y
Out[106]:
log_log_Y log_log_Y^ log_log_û Y Y^ û
8208 0.000000 -0.232962 0.232962 2.718282 2.208213 0.510068
2093 1.124342 1.041000 0.083342 21.719072 16.980203 4.738869
6425 1.079932 1.102279 -0.022346 19.000790 20.308087 -1.307297
4802 0.000000 -0.065669 0.065669 2.718282 2.550886 0.167396
924 1.257577 0.915709 0.341868 33.679512 12.164785 21.514727
... ... ... ... ... ... ...
4719 0.668679 1.026345 -0.357666 7.040350 16.294803 -9.254453
2831 0.846883 0.840389 0.006494 10.302288 10.147926 0.154362
2938 1.489226 1.015788 0.473438 84.239554 15.824179 68.415375
1375 1.026074 0.910699 0.115375 16.282508 12.013842 4.268667
3259 0.000000 1.078248 -1.078248 2.718282 18.906836 -16.188554

1683 rows × 6 columns

Bom, conseguimos observar que, esse, definitivamente, não é o melhor modelo para se prever os preços dos jogos da steam, porém, é visivel o efeito das variáveis sobre jogos mais populares. Dessa forma, podemos dizer que, se o jogo for mais popular do que uma média de detentores maior que 10 mil, o poder explicativo do modelo torna-se maior.