diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000000..f56b8a65594
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+.Rhistory
+.RData
+.Rproj.user/
+.Renviron
+.DS_Store
+.env
+*.log
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000000..a1004d6f77b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,9 @@
+FROM rocker/r-ver:4.3.0
+
+RUN install2.r rmarkdown ggplot2 dplyr lattice
+
+WORKDIR /app
+
+COPY . .
+
+CMD ["Rscript", "-e", "rmarkdown::render('PA1_template.Rmd', output_dir='docs/')"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000000..53ab1338e77
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Gabriel Lafis
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/PA template files figure-hmtl/unnamed-chunk-11-1.png b/PA template files figure-hmtl/unnamed-chunk-11-1.png
new file mode 100644
index 00000000000..428a2cb80cf
Binary files /dev/null and b/PA template files figure-hmtl/unnamed-chunk-11-1.png differ
diff --git a/PA template files figure-hmtl/unnamed-chunk-15-1.png b/PA template files figure-hmtl/unnamed-chunk-15-1.png
new file mode 100644
index 00000000000..88bac707c64
Binary files /dev/null and b/PA template files figure-hmtl/unnamed-chunk-15-1.png differ
diff --git a/PA template files figure-hmtl/unnamed-chunk-3-1.png b/PA template files figure-hmtl/unnamed-chunk-3-1.png
new file mode 100644
index 00000000000..29b2738fced
Binary files /dev/null and b/PA template files figure-hmtl/unnamed-chunk-3-1.png differ
diff --git a/PA template files figure-hmtl/unnamed-chunk-6-1.png b/PA template files figure-hmtl/unnamed-chunk-6-1.png
new file mode 100644
index 00000000000..dd49d0ffb20
Binary files /dev/null and b/PA template files figure-hmtl/unnamed-chunk-6-1.png differ
diff --git a/PA1_template.Rmd b/PA1_template.Rmd
index d5cc677c93d..220cf7a774b 100644
--- a/PA1_template.Rmd
+++ b/PA1_template.Rmd
@@ -1,25 +1,163 @@
----
-title: "Reproducible Research: Peer Assessment 1"
-output:
- html_document:
- keep_md: true
----
-
-
-## Loading and preprocessing the data
-
-
-
-## What is mean total number of steps taken per day?
-
-
-
-## What is the average daily activity pattern?
-
-
-
-## Imputing missing values
-
-
-
-## Are there differences in activity patterns between weekdays and weekends?
+---
+title: "Reproducible Research: Peer Assessment 1"
+output:
+ html_document:
+ keep_md: true
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+
+#libraries
+library(dplyr)
+library(ggplot2)
+
+# settings
+Sys.setlocale("LC_TIME", "C")
+```
+
+## Loading and preprocessing the data
+
+In this first section, I load the data and process the variable date to get a correct format of date.
+
+```{r}
+# read data
+activity <- read.csv("activity.csv")
+activity$date <- as.Date(activity$date)
+
+```
+
+## What is mean total number of steps taken per day?
+
+I aggregate the data to determine the total number of steps taken each day. The dplyr package is necessary for this.
+
+```{r}
+# manage data (aggregate per day)
+act_day <- activity %>%
+ group_by(date)%>%
+ summarise(all_steps = sum(steps))
+```
+
+### Histogram
+
+I use the previous data to plot the histogram of the total number of steps taken each day.
+
+```{r}
+hist(act_day$all_steps, breaks = 10,
+ main = "Histogram of the total number of steps taken each day",
+ xlab = "Steps")
+```
+
+### Mean and median of the total number of steps taken per day
+
+Also, I use the previous data to obtain the mean and median of the total number of steps taken per day.
+
+```{r}
+paste("Mean:", round(mean(act_day$all_steps, na.rm = TRUE), 2))
+paste("Median:", median(act_day$all_steps, na.rm = TRUE))
+```
+
+## What is the average daily activity pattern?
+
+First, I obtain the mean of number of steps taken per interval. I ommit the NA values.
+
+```{r}
+act_interval <- activity %>%
+ group_by(interval)%>%
+ summarise(mean_steps = mean(steps, na.rm = T ))
+```
+
+Now, I can plot the time series.
+
+```{r}
+plot(act_interval$interval, act_interval$mean_steps, type = "l",
+ main = "Mean of number of steps per interval", xlab = "Interval", ylab = "Steps")
+```
+
+Finally, I find the maximum number of steps and the interval that contains this maximum.
+
+***Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?***
+
+The maximum number of steps (average across all the days in the dataset) is 206.17 and it is taken in the interval 835.
+
+```{r}
+max_act <- max(act_interval$mean_steps)
+act_interval[act_interval$mean_steps == max_act, ]
+```
+
+## Imputing missing values
+
+I obtain the total of NAs in the dataset.
+
+```{r}
+paste("Total of NAs in the dataset:", sum(is.na(activity$steps)))
+```
+
+To impute the missing values in the dataset, I use the mean of the intervals obtained in the previous section, and then assign this mean to each NA value. Then I get the new dataset without NAs.
+
+```{r}
+data_NA <- merge(activity[is.na(activity$steps),], act_interval, by = "interval")
+data_NA$steps <-data_NA$mean_steps
+data_NA <- data_NA[,1:3]
+
+data_NA <- rbind(data_NA,activity[!is.na(activity$steps),] )
+
+```
+
+Now, I obtain the same aggregation for the histogram and the mean and median of the dataset.
+
+```{r}
+# manage data (aggregate per day)
+act_day_NA <- data_NA %>%
+ group_by(date)%>%
+ summarise(all_steps = sum(steps))
+```
+
+### Histogram without NAs
+
+```{r}
+hist(act_day_NA$all_steps, breaks = 10,
+ main = "Histogram of the total number of steps taken each day \n (without NAs)")
+```
+
+### Mean and median of the total number of steps taken per day (without NAs)
+
+```{r}
+paste("Mean (without NAs) :", round(mean(act_day_NA$all_steps, na.rm = TRUE), 2))
+paste("Median (without NAs):", median(act_day_NA$all_steps, na.rm = TRUE))
+```
+
+***Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?***
+
+The mean is the same, but the median differs a little. The mean doesn't differ because the number missing values are the same in all the intervals and days, if we fill them with the mean of each interval, nether the mean of interval and the mean of day change.
+
+## Are there differences in activity patterns between weekdays and weekends?
+
+I calculate the new factor variable ("weekday" and "weekend"),
+
+```{r}
+data_NA$weekday_s <- weekdays(data_NA$date)
+data_NA$weekday <- ifelse(data_NA$weekday_s == "Sunday" | data_NA$weekday_s == "Saturday", "Weekend", "Weekday" )
+data_NA$weekday <- as.factor(data_NA$weekday)
+```
+
+I aggregate the data by interval.
+
+```{r}
+# manage data (aggregate per interval-weekday factor)
+act_day_weekday <- data_NA %>%
+ group_by(weekday,interval)%>%
+ summarise(mean_steps = mean(steps),.groups = "drop")
+```
+
+Finnaly, I plot the number of steps taken by interval differencing between weekdays and weekends
+
+```{r}
+ggplot(act_day_weekday, aes(x = interval, y = mean_steps)) +
+ geom_line() +
+ facet_wrap(~ weekday, ncol = 1) +
+ theme_minimal() +
+ labs(title = "Total number of steps taken each interval",
+ x = "Interval", y = "Steps") +
+ theme( plot.title = element_text(hjust = 0.5) )
+```
diff --git a/PA1_template.html b/PA1_template.html
new file mode 100644
index 00000000000..895c2307a57
--- /dev/null
+++ b/PA1_template.html
@@ -0,0 +1,527 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+Reproducible Research: Peer Assessment 1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Loading and preprocessing the data
+
In this first section, I load the data and process the variable date
+to get a correct format of date.
+
# read data
+activity <- read.csv("activity.csv")
+activity$date <- as.Date(activity$date)
+
+
+
What is mean total number of steps taken per day?
+
I aggregate the data to determine the total number of steps taken
+each day. The dplyr package is necessary for this.
+
# manage data (aggregate per day)
+act_day <- activity %>%
+ group_by(date)%>%
+ summarise(all_steps = sum(steps))
+
+
Histogram
+
I use the previous data to plot the histogram of the total number of
+steps taken each day.
+
hist(act_day$all_steps, breaks = 10,
+ main = "Histogram of the total number of steps taken each day",
+ xlab = "Steps")
+

+
+
+
+
+
What is the average daily activity pattern?
+
First, I obtain the mean of number of steps taken per interval. I
+ommit the NA values.
+
act_interval <- activity %>%
+ group_by(interval)%>%
+ summarise(mean_steps = mean(steps, na.rm = T ))
+
Now, I can plot the time series.
+
plot(act_interval$interval, act_interval$mean_steps, type = "l",
+ main = "Mean of number of steps per interval", xlab = "Interval", ylab = "Steps")
+

+
Finally, I find the maximum number of steps and the interval that
+contains this maximum.
+
Which 5-minute interval, on average across all the days
+in the dataset, contains the maximum number of steps?
+
The maximum number of steps (average across all the days in the
+dataset) is 206.17 and it is taken in the interval 835.
+
max_act <- max(act_interval$mean_steps)
+act_interval[act_interval$mean_steps == max_act, ]
+
## # A tibble: 1 × 2
+## interval mean_steps
+## <int> <dbl>
+## 1 835 206.
+
+
+
Imputing missing values
+
I obtain the total of NAs in the dataset.
+
paste("Total of NAs in the dataset:", sum(is.na(activity$steps)))
+
## [1] "Total of NAs in the dataset: 2304"
+
To impute the missing values in the dataset, I use the mean of the
+intervals obtained in the previous section, and then assign this mean to
+each NA value. Then I get the new dataset without NAs.
+
data_NA <- merge(activity[is.na(activity$steps),], act_interval, by = "interval")
+data_NA$steps <-data_NA$mean_steps
+data_NA <- data_NA[,1:3]
+
+data_NA <- rbind(data_NA,activity[!is.na(activity$steps),] )
+
Now, I obtain the same aggregation for the histogram and the mean and
+median of the dataset.
+
# manage data (aggregate per day)
+act_day_NA <- data_NA %>%
+ group_by(date)%>%
+ summarise(all_steps = sum(steps))
+
+
Histogram without NAs
+
hist(act_day_NA$all_steps, breaks = 10,
+ main = "Histogram of the total number of steps taken each day \n (without NAs)")
+

+
+
+
+
+
Are there differences in activity patterns between weekdays and
+weekends?
+
I calculate the new factor variable (“weekday” and “weekend”),
+
data_NA$weekday_s <- weekdays(data_NA$date)
+data_NA$weekday <- ifelse(data_NA$weekday_s == "Sunday" | data_NA$weekday_s == "Saturday", "Weekend", "Weekday" )
+data_NA$weekday <- as.factor(data_NA$weekday)
+
I aggregate the data by interval.
+
# manage data (aggregate per interval-weekday factor)
+act_day_weekday <- data_NA %>%
+ group_by(weekday,interval)%>%
+ summarise(mean_steps = mean(steps),.groups = "drop")
+
Finnaly, I plot the number of steps taken by interval differencing
+between weekdays and weekends
+
ggplot(act_day_weekday, aes(x = interval, y = mean_steps)) +
+ geom_line() +
+ facet_wrap(~ weekday, ncol = 1) +
+ theme_minimal() +
+ labs(title = "Total number of steps taken each interval",
+ x = "Interval", y = "Steps") +
+ theme( plot.title = element_text(hjust = 0.5) )
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/PA1_template.md b/PA1_template.md
new file mode 100644
index 00000000000..b95c95be740
--- /dev/null
+++ b/PA1_template.md
@@ -0,0 +1,208 @@
+---
+title: "Reproducible Research: Peer Assessment 1"
+output:
+ html_document:
+ keep_md: true
+---
+
+
+
+## Loading and preprocessing the data
+
+In this first section, I load the data and process the variable date to get a correct format of date.
+
+
+``` r
+# read data
+activity <- read.csv("activity.csv")
+activity$date <- as.Date(activity$date)
+```
+
+## What is mean total number of steps taken per day?
+
+I aggregate the data to determine the total number of steps taken each day. The dplyr package is necessary for this.
+
+
+``` r
+# manage data (aggregate per day)
+act_day <- activity %>%
+ group_by(date)%>%
+ summarise(all_steps = sum(steps))
+```
+
+### Histogram
+
+I use the previous data to plot the histogram of the total number of steps taken each day.
+
+
+``` r
+hist(act_day$all_steps, breaks = 10,
+ main = "Histogram of the total number of steps taken each day",
+ xlab = "Steps")
+```
+
+
+
+### Mean and median of the total number of steps taken per day
+
+Also, I use the previous data to obtain the mean and median of the total number of steps taken per day.
+
+
+``` r
+paste("Mean:", round(mean(act_day$all_steps, na.rm = TRUE), 2))
+```
+
+```
+## [1] "Mean: 10766.19"
+```
+
+``` r
+paste("Median:", median(act_day$all_steps, na.rm = TRUE))
+```
+
+```
+## [1] "Median: 10765"
+```
+
+## What is the average daily activity pattern?
+
+First, I obtain the mean of number of steps taken per interval. I ommit the NA values.
+
+
+``` r
+act_interval <- activity %>%
+ group_by(interval)%>%
+ summarise(mean_steps = mean(steps, na.rm = T ))
+```
+
+Now, I can plot the time series.
+
+
+``` r
+plot(act_interval$interval, act_interval$mean_steps, type = "l",
+ main = "Mean of number of steps per interval", xlab = "Interval", ylab = "Steps")
+```
+
+
+
+Finally, I find the maximum number of steps and the interval that contains this maximum.
+
+***Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?***
+
+The maximum number of steps (average across all the days in the dataset) is 206.17 and it is taken in the interval 835.
+
+
+``` r
+max_act <- max(act_interval$mean_steps)
+act_interval[act_interval$mean_steps == max_act, ]
+```
+
+```
+## # A tibble: 1 × 2
+## interval mean_steps
+##
+## 1 835 206.
+```
+
+## Imputing missing values
+
+I obtain the total of NAs in the dataset.
+
+
+``` r
+paste("Total of NAs in the dataset:", sum(is.na(activity$steps)))
+```
+
+```
+## [1] "Total of NAs in the dataset: 2304"
+```
+
+To impute the missing values in the dataset, I use the mean of the intervals obtained in the previous section, and then assign this mean to each NA value. Then I get the new dataset without NAs.
+
+
+``` r
+data_NA <- merge(activity[is.na(activity$steps),], act_interval, by = "interval")
+data_NA$steps <-data_NA$mean_steps
+data_NA <- data_NA[,1:3]
+
+data_NA <- rbind(data_NA,activity[!is.na(activity$steps),] )
+```
+
+Now, I obtain the same aggregation for the histogram and the mean and median of the dataset.
+
+
+``` r
+# manage data (aggregate per day)
+act_day_NA <- data_NA %>%
+ group_by(date)%>%
+ summarise(all_steps = sum(steps))
+```
+
+### Histogram without NAs
+
+
+``` r
+hist(act_day_NA$all_steps, breaks = 10,
+ main = "Histogram of the total number of steps taken each day \n (without NAs)")
+```
+
+
+
+### Mean and median of the total number of steps taken per day (without NAs)
+
+
+``` r
+paste("Mean (without NAs) :", round(mean(act_day_NA$all_steps, na.rm = TRUE), 2))
+```
+
+```
+## [1] "Mean (without NAs) : 10766.19"
+```
+
+``` r
+paste("Median (without NAs):", median(act_day_NA$all_steps, na.rm = TRUE))
+```
+
+```
+## [1] "Median (without NAs): 10766.1886792453"
+```
+
+***Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?***
+
+The mean is the same, but the median differs a little. The mean doesn't differ because the number missing values are the same in all the intervals and days, if we fill them with the mean of each interval, nether the mean of interval and the mean of day change.
+
+## Are there differences in activity patterns between weekdays and weekends?
+
+I calculate the new factor variable ("weekday" and "weekend"),
+
+
+``` r
+data_NA$weekday_s <- weekdays(data_NA$date)
+data_NA$weekday <- ifelse(data_NA$weekday_s == "Sunday" | data_NA$weekday_s == "Saturday", "Weekend", "Weekday" )
+data_NA$weekday <- as.factor(data_NA$weekday)
+```
+
+I aggregate the data by interval.
+
+
+``` r
+# manage data (aggregate per interval-weekday factor)
+act_day_weekday <- data_NA %>%
+ group_by(weekday,interval)%>%
+ summarise(mean_steps = mean(steps),.groups = "drop")
+```
+
+Finnaly, I plot the number of steps taken by interval differencing between weekdays and weekends
+
+
+``` r
+ggplot(act_day_weekday, aes(x = interval, y = mean_steps)) +
+ geom_line() +
+ facet_wrap(~ weekday, ncol = 1) +
+ theme_minimal() +
+ labs(title = "Total number of steps taken each interval",
+ x = "Interval", y = "Steps") +
+ theme( plot.title = element_text(hjust = 0.5) )
+```
+
+
diff --git a/README.md b/README.md
index 05763414e69..89490bc3199 100644
--- a/README.md
+++ b/README.md
@@ -1,168 +1,95 @@
-## Introduction
+
-It is now possible to collect a large amount of data about personal
-movement using activity monitoring devices such as a
-[Fitbit](http://www.fitbit.com), [Nike
-Fuelband](http://www.nike.com/us/en_us/c/nikeplus-fuelband), or
-[Jawbone Up](https://jawbone.com/up). These type of devices are part of
-the "quantified self" movement -- a group of enthusiasts who take
-measurements about themselves regularly to improve their health, to
-find patterns in their behavior, or because they are tech geeks. But
-these data remain under-utilized both because the raw data are hard to
-obtain and there is a lack of statistical methods and software for
-processing and interpreting the data.
+# Reproducible Research: Activity Monitoring Analysis | Pesquisa Reproduzível: Análise de Monitoramento de Atividade
-This assignment makes use of data from a personal activity monitoring
-device. This device collects data at 5 minute intervals through out the
-day. The data consists of two months of data from an anonymous
-individual collected during the months of October and November, 2012
-and include the number of steps taken in 5 minute intervals each day.
+[](https://www.r-project.org/)
+[](https://rmarkdown.rstudio.com/)
+[](LICENSE)
+[](Dockerfile)
-## Data
+**Reproducible data analysis pipeline for personal activity monitoring device data**
-The data for this assignment can be downloaded from the course web
-site:
+[English](#english) | [Português](#português)
-* Dataset: [Activity monitoring data](https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip) [52K]
+
-The variables included in this dataset are:
+---
-* **steps**: Number of steps taking in a 5-minute interval (missing
- values are coded as `NA`)
+## English
-* **date**: The date on which the measurement was taken in YYYY-MM-DD
- format
+### Overview
-* **interval**: Identifier for the 5-minute interval in which
- measurement was taken
+A reproducible research project analyzing data from personal activity monitoring devices (pedometers/fitness trackers). The analysis processes 17,568 observations across 61 days, addressing missing data imputation, daily activity patterns, and weekday vs weekend behavior comparison using R and RMarkdown.
+### Architecture
+```mermaid
+flowchart LR
+ A[Raw CSV Data] --> B[Data Loading]
+ B --> C[Missing Value Analysis]
+ C --> D[Imputation Strategy]
+ D --> E[Daily Step Totals]
+ D --> F[Interval Patterns]
+ D --> G[Weekday vs Weekend]
+ E --> H[Statistical Summary]
+ F --> H
+ G --> H
+ H --> I[RMarkdown Report]
+```
+### Key Features
-The dataset is stored in a comma-separated-value (CSV) file and there
-are a total of 17,568 observations in this
-dataset.
-
-
-## Assignment
-
-This assignment will be described in multiple parts. You will need to
-write a report that answers the questions detailed below. Ultimately,
-you will need to complete the entire assignment in a **single R
-markdown** document that can be processed by **knitr** and be
-transformed into an HTML file.
-
-Throughout your report make sure you always include the code that you
-used to generate the output you present. When writing code chunks in
-the R markdown document, always use `echo = TRUE` so that someone else
-will be able to read the code. **This assignment will be evaluated via
-peer assessment so it is essential that your peer evaluators be able
-to review the code for your analysis**.
-
-For the plotting aspects of this assignment, feel free to use any
-plotting system in R (i.e., base, lattice, ggplot2)
-
-Fork/clone the [GitHub repository created for this
-assignment](http://github.com/rdpeng/RepData_PeerAssessment1). You
-will submit this assignment by pushing your completed files into your
-forked repository on GitHub. The assignment submission will consist of
-the URL to your GitHub repository and the SHA-1 commit ID for your
-repository state.
-
-NOTE: The GitHub repository also contains the dataset for the
-assignment so you do not have to download the data separately.
-
-
-
-### Loading and preprocessing the data
-
-Show any code that is needed to
-
-1. Load the data (i.e. `read.csv()`)
-
-2. Process/transform the data (if necessary) into a format suitable for your analysis
-
-
-### What is mean total number of steps taken per day?
-
-For this part of the assignment, you can ignore the missing values in
-the dataset.
-
-1. Make a histogram of the total number of steps taken each day
-
-2. Calculate and report the **mean** and **median** total number of steps taken per day
-
-
-### What is the average daily activity pattern?
-
-1. Make a time series plot (i.e. `type = "l"`) of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)
-
-2. Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
-
-
-### Imputing missing values
-
-Note that there are a number of days/intervals where there are missing
-values (coded as `NA`). The presence of missing days may introduce
-bias into some calculations or summaries of the data.
-
-1. Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with `NA`s)
-
-2. Devise a strategy for filling in all of the missing values in the dataset. The strategy does not need to be sophisticated. For example, you could use the mean/median for that day, or the mean for that 5-minute interval, etc.
-
-3. Create a new dataset that is equal to the original dataset but with the missing data filled in.
-
-4. Make a histogram of the total number of steps taken each day and Calculate and report the **mean** and **median** total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
-
-
-### Are there differences in activity patterns between weekdays and weekends?
-
-For this part the `weekdays()` function may be of some help here. Use
-the dataset with the filled-in missing values for this part.
-
-1. Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
-
-1. Make a panel plot containing a time series plot (i.e. `type = "l"`) of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekday days or weekend days (y-axis). The plot should look something like the following, which was created using **simulated data**:
+- **Missing Data Handling**: Strategic imputation of 2,304 NA values using interval-based averages
+- **Time Series Analysis**: 5-minute interval activity patterns across 24-hour cycles
+- **Comparative Analysis**: Weekday vs weekend activity pattern differentiation
+- **Reproducible Pipeline**: Fully documented RMarkdown workflow with embedded visualizations
-
+### Industry Applications
+- **Health Tech**: Fitness tracker data analytics for Fitbit, Apple Health, Google Fit platforms
+- **Insurance**: Actuarial analysis using physical activity data for health insurance pricing models
+- **Corporate Wellness**: Employee activity monitoring programs and engagement metrics
+- **Clinical Research**: Patient activity monitoring in pharmaceutical trials and rehabilitation
-**Your plot will look different from the one above** because you will
-be using the activity monitor data. Note that the above plot was made
-using the lattice system but you can make the same version of the plot
-using any plotting system you choose.
+### Quick Start
+```bash
+# With Docker
+docker build -t activity-analysis .
+docker run activity-analysis
-## Submitting the Assignment
+# Local
+Rscript -e "rmarkdown::render('PA1_template.Rmd')"
+```
-To submit the assignment:
+### Fork Context
-1. Commit your completed `PA1_template.Rmd` file to the `master` branch of your git repository (you should already be on the `master` branch unless you created new ones)
+This project builds upon Coursera's Reproducible Research course (Johns Hopkins University), extending the analysis with production-grade documentation and containerization.
-2. Commit your `PA1_template.md` and `PA1_template.html` files produced by processing your R markdown file with the `knit2html()` function in R (from the **knitr** package)
+---
-3. If your document has figures included (it should) then they should have been placed in the `figure/` directory by default (unless you overrode the default). Add and commit the `figure/` directory to your git repository.
+## Português
-4. Push your `master` branch to GitHub.
+### Visão Geral
-5. Submit the URL to your GitHub repository for this assignment on the course web site.
+Projeto de pesquisa reproduzível analisando dados de dispositivos de monitoramento de atividade pessoal. A análise processa 17.568 observações ao longo de 61 dias, abordando imputação de dados ausentes, padrões de atividade diária e comparação entre dias úteis e fins de semana.
-In addition to submitting the URL for your GitHub repository, you will
-need to submit the 40 character SHA-1 hash (as string of numbers from
-0-9 and letters from a-f) that identifies the repository commit that
-contains the version of the files you want to submit. You can do this
-in GitHub by doing the following:
+### Funcionalidades Principais
-1. Go into your GitHub repository web page for this assignment
+- **Tratamento de Dados Ausentes**: Imputação estratégica de 2.304 valores NA usando médias por intervalo
+- **Análise de Séries Temporais**: Padrões de atividade em intervalos de 5 minutos ao longo de 24 horas
+- **Análise Comparativa**: Diferenciação de padrões entre dias úteis e fins de semana
+- **Pipeline Reproduzível**: Workflow RMarkdown completo com visualizações integradas
-2. Click on the "?? commits" link where ?? is the number of commits you have in the repository. For example, if you made a total of 10 commits to this repository, the link should say "10 commits".
+### Aplicações na Indústria
-3. You will see a list of commits that you have made to this repository. The most recent commit is at the very top. If this represents the version of the files you want to submit, then just click the "copy to clipboard" button on the right hand side that should appear when you hover over the SHA-1 hash. Paste this SHA-1 hash into the course web site when you submit your assignment. If you don't want to use the most recent commit, then go down and find the commit you want and copy the SHA-1 hash.
+- **Health Tech**: Analytics de dados de rastreadores fitness para plataformas Fitbit, Apple Health, Google Fit
+- **Seguros**: Análise atuarial usando dados de atividade física para precificação de seguros saúde
+- **Bem-Estar Corporativo**: Programas de monitoramento de atividade de funcionários
+- **Pesquisa Clínica**: Monitoramento de atividade de pacientes em trials farmacêuticos
-A valid submission will look something like (this is just an **example**!)
+---
-```r
-https://github.com/rdpeng/RepData_PeerAssessment1
+## License
-7c376cc5447f11537f8740af8e07d6facc3d9645
-```
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.