這是小雷在Google Data Analytic課程中最後一階段的實作項目,隨著實作內容開始增多,現在回頭來看這份分析文件,坦白說會覺得缺少了很多,雖然很不想放上來被恥笑,但這總是一個學習的過程,總是由不會到會,由不熟悉到熟悉,也作為自己每個階段回顧時是否有更進一步的證明,這份分析報告中,都是自己從無到有,有遇到很多奇奇怪怪困難的點,一一的查找資料後慢慢做出來,也找了很多看了很多對於健身的相關文章,剛開始做出來時真的是非常難以言喻的興奮,未來小雷可能會再做一份新的研究報告,但是舊的這份會一直留存,是開始另一個階段自我成長的足跡。
這份分析報告有很多地方是需要調整,所以在語法上就不加多做說明,代碼執行結果有很多會跟背景同色,所以就刪除掉,有興趣的朋友可以自行參考或上小雷的Kaggle閱覽~
小雷的Kaggle : 請點我
In [1]:
library(tidyverse)
library(dplyr)
library(tidyr)
library(here)
library(skimr)
library(janitor)
library(lubridate)In [2]:
activity <- read.csv('../input/fitbit/Fitabase Data 4.12.16-5.12.16/dailyActivity_merged.csv')# 活動合併數據
mets <- read.csv('../input/fitbit/Fitabase Data 4.12.16-5.12.16/minuteMETsNarrow_merged.csv')# 分鐘代謝當量
rate <- read.csv('../input/fitbit/Fitabase Data 4.12.16-5.12.16/heartrate_seconds_merged.csv')# 心跳頻率
bmi <- read.csv('../input/fitbit/Fitabase Data 4.12.16-5.12.16/weightLogInfo_merged.csv')# 權重日誌訊息
sleep <- read.csv('../input/fitbit/Fitabase Data 4.12.16-5.12.16/sleepDay_merged.csv')# 每日睡眠
In [3]:
unique(activity$Id)# 33個
unique(mets$Id)# 33個
unique(rate$Id)# 14個
unique(bmi$Id)# 8個
unique(sleep$Id)# 24個In [4]:
str(activity)
str(mets)
str(rate)
str(bmi)
str(sleep)In [5]:
sum(is.na(mets))
sum(is.na(activity))
sum(is.na(rate))
sum(is.na(bmi)) #TURE >> 65na
sum(is.na(sleep))In [6]:
bmi <- 
  mutate(bmi, height_m = sqrt(WeightKg/BMI)) %>% 
  mutate(bmi, height_m = round(height_m, digits = 2)) %>% 
  mutate(bmi, BMI = round(BMI, digits = 1))
bmi <-  
  rename(bmi, date = Date) %>% 
  separate(date, into = c("date", "Time"), sep = " ") %>% 
  clean_names()
bmi <- 
  select(bmi, id, date, weight_kg, bmi, height_m)In [7]:
activity <- 
  rename(activity, date = ActivityDate) %>% 
  separate(date, into = c("date"), sep = " ") %>% 
  clean_names()
activity <- 
  select(activity, 
         -sedentary_active_distance, 
         -logged_activities_distance)
In [8]:
mets <- 
  rename(mets, date = ActivityMinute) %>% 
  separate(date, into = c("date", "time"), sep = " ") %>% 
  clean_names()
mets <- 
  mutate(mets, mets = (me_ts / 10))
mets <- 
  select(mets, -time, -me_ts) %>% 
  group_by(id, date) %>% 
  summarise(mets = sum(mets))In [9]:
rate <- 
  rename(rate, date = Time) %>% 
  separate(date, into = c("date", "time"), sep = " ") %>% 
  clean_names()
rate <- 
  select(rate, -time)
rate <- 
  group_by(rate, id, date) %>% 
  summarise_all(list(sum, mean))In [10]:
sleep <- 
  rename(sleep, date = SleepDay, 
         total_sleep_hour = TotalMinutesAsleep) %>% 
  separate(date, into = c("date", "time"), sep = " ") %>% 
  clean_names()
sleep <- select(sleep, -time)
sleep <- 
  mutate(sleep, sleep_hour = (total_sleep_hour / 60), 
         time_in_bed = (total_time_in_bed / 60)) %>% 
  mutate(sleep, sleep_hour = round(sleep_hour, digits = 2), 
         time_in_bed = round(time_in_bed, digits =2))In [11]:
activity %>% 
  summary()In [12]:
mets %>% 
  summary()In [13]:
rate %>% 
  summary()In [14]:
bmi %>% 
  summary()In [15]:
sleep %>% 
  summary()In [16]:
library(ggplot2)
bmi_sleep <- merge(bmi, sleep, by = "id")
In [17]:
ggplot(data = sleep, mapping = aes(x = date, y = sleep_hour )) +
  geom_point(aes(color = date)) +
  theme(axis.text.x = element_text(angle = 60)) + 
  xlab("Date") + 
  ylab("Sleep Hour") + 
  labs(title = "Sleep & Day", 
       subtitle = "Daily Sleep Time Distribution", 
       fill = "Date")
In [18]:
ggplot(data = activity) + 
  geom_point(mapping = aes(x = total_steps, 
                           y = calories, 
                           color = date)) + 
  geom_smooth(aes(x = total_steps, 
                  y = calories)) +
  xlab("Total Steps") + 
  ylab("Calories") + 
  labs(title = "Step & Calories", 
       subtitle = "Do More Steps Burn More Calories?", 
       fill = "Date")
In [19]:
ggplot(data = activity) + 
  geom_point(mapping = aes(x = lightly_active_minutes, 
                           y = very_active_minutes, 
                           color = calories,
                           size = calories)) +
  geom_smooth((aes(x = lightly_active_minutes, 
                   y = very_active_minutes))) + 
  xlab("Lightly Active Min") + 
  ylab("Hightly Active Min") + 
  labs(title = "Lightly & Heightly Active", 
       subtitle = "Differences In Calorie Consumption")
In [20]:
ggplot(data = bmi_sleep) + 
  geom_boxplot(mapping = aes(x = as.factor(bmi), 
                             y = sleep_hour, 
                             fill = as.factor(bmi))) + 
  theme(axis.text.x = element_text(angle = 0)) + 
  xlab("BMI") + 
  ylab("Sleep Hour") + 
  labs(title = "sleep time", 
       subtitle = "sleep time", 
       fill = "BMI")




 
沒有留言:
張貼留言