Assignment 2

Assignment 2

library(rvest) library(dplyr)

data_pages <- c( “https://m.imdb.com/search/title/?title_type=feature&release_date=1800-12-31,2009-12-31&num_votes=2500,&country_of_origin=TR” “https://m.imdb.com/search/title/?title_type=feature&release_date=2010-01-01,2023-12-31&num_votes=2500,&country_of_origin=TR” )

extract_data <- function(url) { page <- read_html(url)

title_names <- page %>% html_nodes(’.ipc-title__text’) %>% html_text() Year <- page %>% html_nodes(“.sc-43986a27-7.dBkaPT.dli-title-metadata”) %>% html_text() Durations <- page %>% html_nodes(“.dli-title-metadata-item:nth-child(2)”) %>% html_text() Ratings <- page %>% html_nodes(“.ratingGroup–imdb-rating”) %>% html_text() Votes <- page %>% html_nodes(“.kRnqtn”) %>% html_text()

data.frame( title_names = title_names, Year = Year, Durations = Durations, Ratings = Ratings, Votes = Votes ) }

data_list <- lapply(data_pages, extract_data) df <- do.call(rbind, data_list)

df

Back to top