<- c("https://m.imdb.com/search/title/?title_type=feature&release_date=2010-01-01,2023-12-31&sort=user_rating,desc&num_votes=2500,&country_of_origin=TR&count=250","https://m.imdb.com/search/title/?title_type=feature&release_date=,2009-12-31&sort=user_rating,desc&num_votes=2500,&country_of_origin=TR&count=250") url
Assignment 2
Assignment 2
(1)
(2)
library(tidyverse)
library(rvest)
library(stringr)
library(knitr)
library(ggplot2)
<- function(time_str) {
convert_time <- 0
hours <- 0
minutes
if (grepl("h", time_str)) {
<- strsplit(time_str, "h|m")[[1]]
time_components if (length(time_components) >= 1) {
<- as.numeric(time_components[1])
hours
}if (length(time_components) >= 2) {
<- as.numeric(time_components[2])
minutes
}else {
} <- as.numeric(gsub("m", "", time_str))
minutes
}
<- hours * 60 + minutes
total_minutes return(total_minutes)
}
<- read_html(url[1])
data_html <- data_html |> html_nodes('.ipc-title__text')
title_names_1 <- html_text(title_names_1)
title_names_1 <- tail(head(title_names_1,-1),-1)
title_names_1 <- str_split(title_names_1, " ", n=2)
title_names_1 <- unlist(lapply(title_names_1, function(x) {x[2]}))
title_names_1
<- data_html |> html_nodes(".dli-title-metadata-item:nth-child(1)")
year_1 <- as.numeric(html_text(year_1))
year_1
<- data_html |> html_nodes((".dli-title-metadata-item:nth-child(2)"))
duration_1 <- html_text(duration_1)
duration_1 <- unlist(lapply(duration_1, convert_time))
duration_1
<- data_html |> html_nodes(".ratingGroup--imdb-rating")
rating_1 <- html_text(rating_1)
rating_1 <- as.numeric(str_extract(rating_1, "\\d+\\.\\d+"))
rating_1
<- data_html |> html_nodes(".kRnqtn")
votes_1 <- html_text(votes_1)
votes_1 <- as.numeric(gsub("\\D", "", votes_1))
votes_1
<- read_html(url[2])
data_html <- data_html |> html_nodes('.ipc-title__text')
title_names_2 <- html_text(title_names_2)
title_names_2 <- tail(head(title_names_2,-1),-1)
title_names_2 <- str_split(title_names_2, " ", n=2)
title_names_2 <- unlist(lapply(title_names_2, function(x) {x[2]}))
title_names_2
<- data_html |> html_nodes(".dli-title-metadata-item:nth-child(1)")
year_2 <- as.numeric(html_text(year_2))
year_2
<- data_html |> html_nodes((".dli-title-metadata-item:nth-child(2)"))
duration_2 <- html_text(duration_2)
duration_2 <- unlist(lapply(duration_2, convert_time))
duration_2
<- data_html |> html_nodes(".ratingGroup--imdb-rating")
rating_2 <- html_text(rating_2)
rating_2 <- as.numeric(str_extract(rating_2, "\\d+\\.\\d+"))
rating_2
<- data_html |> html_nodes(".kRnqtn")
votes_2 <- html_text(votes_2)
votes_2 <- as.numeric(gsub("\\D", "", votes_2))
votes_2
<- c(title_names_1,title_names_2)
Title <- c(year_1,year_2)
Year <- c(duration_1,duration_2)
Duration <- c(rating_1,rating_2)
Rating <- c(votes_1,votes_2)
Votes
<- data.frame(
movie_data Title = Title,
Year = Year,
Duration = Duration,
Rating = Rating,
Votes = Votes
)
(3)
(a)
<- movie_data %>%
movies arrange(desc(Rating))
kable(
%>%
movies head(5),
caption = "Top 5 Movies",
col.names = c("Title", "Year", "Duration", "Rating", "Votes")
)
Title | Year | Duration | Rating | Votes |
---|---|---|---|---|
Hababam Sinifi | 1975 | 87 | 9.2 | 42512 |
CM101MMXI Fundamentals | 2013 | 139 | 9.1 | 46996 |
Tosun Pasa | 1976 | 90 | 8.9 | 24329 |
Hababam Sinifi Sinifta Kaldi | 1975 | 95 | 8.9 | 24369 |
Süt Kardesler | 1976 | 80 | 8.8 | 20889 |
I watched all 5 movies on this list. Of course, there would be movies whose order I would change if I were to make them, but I am happy with this list (especially the first 2 rows).
kable(
%>%
movies tail(5),
caption = "Bottom 5 Movies",
col.names = c("Title", "Year", "Duration", "Rating", "Votes")
)
Title | Year | Duration | Rating | Votes | |
---|---|---|---|---|---|
466 | Cumali Ceber 2 | 2018 | 100 | 1.2 | 10230 |
467 | Müjde | 2022 | 48 | 1.2 | 9919 |
468 | 15/07 Safak Vakti | 2021 | 95 | 1.2 | 20608 |
469 | Cumali Ceber: Allah Seni Alsin | 2017 | 100 | 1.0 | 39269 |
470 | Reis | 2017 | 108 | 1.0 | 73974 |
I respect the opinions of the voters and have not watched any of the movies on this list. :)
(b)
kable(
%>%
movie_data filter(Title == "CM101MMXI Fundamentals" | Title == "Hababam Sinifi" | Title == "Masumiyet"),
caption = "My Best Three",
col.names = c("Title", "Year", "Duration", "Rating", "Votes")
)
Title | Year | Duration | Rating | Votes |
---|---|---|---|---|
CM101MMXI Fundamentals | 2013 | 139 | 9.1 | 46996 |
Hababam Sinifi | 1975 | 87 | 9.2 | 42512 |
Masumiyet | 1997 | 110 | 8.1 | 19295 |
(c)
%>%
movie_data group_by(Year) %>%
summarize(yearly_average = mean(Rating)) %>%
ggplot(aes(x = Year, y = yearly_average)) + geom_point() +
ggtitle("Yearly Rating Averages")
As you can see, the ratings of movies are decreasing over the years. Of course, it is a very difficult subject, but in short, my opinion may be that as the years go by, revenue will be prioritized rather than quality.
ggplot(movie_data, aes(x = factor(Year))) +
geom_bar(fill = "blue", color = "black") +
labs(x = "Year", y = "Number of Movies") +
ggtitle("Number of Movies Over the Years") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90)
)
Similar to my previous comment, the reason for the increase in the number of films over the years may be the fact that many short-lived films are made for the sake of revenue.
$Year <- as.factor(movie_data$Year)
movie_dataggplot(movie_data, aes(x = Year, y = Rating, fill = factor(Year))) +
geom_boxplot(color = "black",width = 0.5) +
labs(x = "Year", y = "Rating") +
ggtitle("Box Plots of Movie Ratings Over the Years") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90)
)
(d)
<- cor(movie_data$Votes, movie_data$Rating)
corr cat("Correlation between Votes and Ratings:", corr)
Correlation between Votes and Ratings: 0.130875
(e)
<- cor(movie_data$Duration, movie_data$Rating)
corr cat("Correlation between Votes and Ratings:", corr)
Correlation between Votes and Ratings: 0.03356006
(4)
= movie_data
movie_data1 <- c("https://m.imdb.com/search/title/?title_type=feature&sort=release_date,asc&num_votes=2500,&groups=top_1000&country_of_origin=TR&count=250")
url <- read_html(url)
data_html <- data_html |> html_nodes('.ipc-title__text')
title_names <- html_text(title_names)
title_names <- tail(head(title_names,-1),-1)
title_names <- str_split(title_names, " ", n=2)
title_names <- unlist(lapply(title_names, function(x) {x[2]}))
title_names
<- data_html |> html_nodes(".dli-title-metadata-item:nth-child(1)")
year <- as.numeric(html_text(year))
year
<- data.frame(
top_movies Title = title_names,
Year = year
)kable (
top_movies,caption = "Turkish Movies in the Top 1000",
)
Title | Year |
---|---|
Eskiya | 1996 |
Her Sey Çok Güzel Olacak | 1998 |
Vizontele | 2001 |
G.O.R.A. | 2004 |
Babam ve Oglum | 2005 |
Nefes: Vatan Sagolsun | 2009 |
Bir Zamanlar Anadolu’da | 2011 |
Kis Uykusu | 2014 |
Ayla: The Daughter of War | 2017 |
Ahlat Agaci | 2018 |
Yedinci Kogustaki Mucize | 2019 |
<- movie_data %>% mutate(Year = as.numeric(Year))
movie_data <- top_movies %>% mutate(Year = as.numeric(Year))
top_movies
<- movies %>%
movies_top2 inner_join(top_movies, by = c("Title", "Year")) %>%
arrange(desc(Rating))
kable(
movies_top2,caption = "Turkish Movies in the Top 1000 Full Table",
col.names = c("Title", "Year", "Duration", "Rating", "Votes")
)
Title | Year | Duration | Rating | Votes |
---|---|---|---|---|
Ayla: The Daughter of War | 2017 | 125 | 8.3 | 42992 |
Yedinci Kogustaki Mucize | 2019 | 132 | 8.2 | 54171 |
Babam ve Oglum | 2005 | 108 | 8.2 | 91035 |
Eskiya | 1996 | 128 | 8.1 | 71704 |
Her Sey Çok Güzel Olacak | 1998 | 107 | 8.1 | 27122 |
Kis Uykusu | 2014 | 196 | 8.0 | 54646 |
Ahlat Agaci | 2018 | 188 | 8.0 | 27015 |
Nefes: Vatan Sagolsun | 2009 | 128 | 8.0 | 35022 |
G.O.R.A. | 2004 | 127 | 8.0 | 66033 |
Vizontele | 2001 | 110 | 8.0 | 38403 |
Bir Zamanlar Anadolu’da | 2011 | 157 | 7.8 | 49365 |
<- arrange(movie_data1, desc(Rating))
movie_data1
kable(
head(movie_data1, 11),
caption = "Turkish Movies in the Top 11",
col.names = c("Title", "Year", "Duration", "Rating", "Votes")
)
Title | Year | Duration | Rating | Votes |
---|---|---|---|---|
Hababam Sinifi | 1975 | 87 | 9.2 | 42512 |
CM101MMXI Fundamentals | 2013 | 139 | 9.1 | 46996 |
Tosun Pasa | 1976 | 90 | 8.9 | 24329 |
Hababam Sinifi Sinifta Kaldi | 1975 | 95 | 8.9 | 24369 |
Süt Kardesler | 1976 | 80 | 8.8 | 20889 |
Saban Oglu Saban | 1977 | 90 | 8.7 | 18534 |
Zügürt Aga | 1985 | 101 | 8.7 | 16135 |
Neseli Günler | 1978 | 95 | 8.7 | 11807 |
Kibar Feyzo | 1978 | 83 | 8.7 | 17126 |
Hababam Sinifi Uyaniyor | 1976 | 94 | 8.7 | 20640 |
Canim Kardesim | 1973 | 85 | 8.6 | 10097 |