Assignment 2

Assignment 2

PART 2

first_url_df <- url_to_df(urls[1])

Code
library(tidyverse) # for everything :)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(rvest) # for HTML scraping

Attaching package: 'rvest'

The following object is masked from 'package:readr':

    guess_encoding
Code
library(stringr) # for string processing

urls <- c("https://m.imdb.com/search/title/?title_type=feature&release_date=2010-01-01,2023-12-31&num_votes=2500,&country_of_origin=TR&count=250", "https://m.imdb.com/search/title/?title_type=feature&release_date=,2009-12-31&num_votes=2500,&country_of_origin=TR&count=250")

my_period_to_seconds <- function(period_){
  
  period_ <- ifelse(str_detect(period_,"h"),(str_replace_all(period_,"h","")),period_)
  period_ <- ifelse(str_detect(period_,"m"),(str_replace_all(period_,"m","")),period_)
  print(period_)
  hours_seconds <- unlist(str_split(period_, " ", n = 2))
  print(hours_seconds)
  
  if(hours_seconds[1] < 4){
    seconds <- 60 * as.integer(hours_seconds[1]) + ifelse(is.na(hours_seconds[2]),0,as.integer(hours_seconds[2]))
  }else {
    
    seconds = as.double(hours_seconds[1])
    
  }
  return((seconds))  
}
  
url_to_df <- function(url)
  {
  
  data_html <- read_html(url)
  title_names <- data_html |> html_nodes('a h3')
  title_names <- html_text(title_names)
  title_names <- str_split(title_names, " ", n=2)
  title_names <- unlist(lapply(title_names, function(x) {x[2]}))
  title_names_df <- data.frame(title_names)
    
    
  years <- data_html |> html_nodes(".sc-43986a27-8.jHYIIK.dli-title-metadata-item:nth-child(1)")
  years <- html_text(years)
  print(years)
  years_df <- data.frame(as.integer(years))
  
  durations <- data_html |> html_nodes(".sc-43986a27-8.jHYIIK.dli-title-metadata-item:nth-child(2)")
  durations <- html_text(durations)
  print(durations)
  print(durations[199])
  
  durations_minutes <- lapply(durations, my_period_to_seconds)
  print(durations_minutes)
  durations_df = data.frame(as.integer(durations_minutes))
  
  rating_with_votes <- data_html |> html_nodes(".sc-9ab53865-1.iXEijC.ratingGroup--imdb-rating")
  rating_with_votes <- html_text(rating_with_votes)
  rating_split <- strsplit(rating_with_votes, "")
  rating_split <- lapply(rating_split, function(x) {x[1:3]})
  rating <- lapply(rating_split, function(x) {paste(x[1],x[2],x[3], sep = "")} )
  rating <- as.numeric(rating)
  rating_df <- data.frame(rating)
  
  votes = data_html |> html_nodes(".sc-53c98e73-0.kRnqtn")
  votes <- html_text(votes)
  votes_only_nums <- votes |> str_replace_all("Votes", "")
  votes_numerical <- parse_number(votes_only_nums, locale = locale(grouping_mark = ","))
  print(votes_numerical)
  votes_df <- data.frame(votes_numerical)
  
  return(bind_cols(title_names_df, years_df, durations_df, rating_df, votes_df))
}



first_url_df <- url_to_df(urls[1])
  [1] "2023" "2023" "2019" "2023" "2023" "2018" "2014" "2016" "2023" "2017"
 [11] "2022" "2018" "2011" "2013" "2015" "2016" "2023" "2015" "2014" "2022"
 [21] "2023" "2012" "2023" "2019" "2012" "2017" "2015" "2023" "2019" "2016"
 [31] "2023" "2014" "2022" "2018" "2020" "2016" "2012" "2012" "2013" "2017"
 [41] "2021" "2013" "2018" "2014" "2022" "2016" "2019" "2011" "2015" "2022"
 [51] "2015" "2019" "2023" "2014" "2017" "2010" "2018" "2022" "2021" "2013"
 [61] "2018" "2019" "2011" "2014" "2022" "2014" "2015" "2019" "2017" "2013"
 [71] "2013" "2021" "2020" "2015" "2014" "2021" "2018" "2017" "2019" "2013"
 [81] "2018" "2022" "2017" "2022" "2017" "2019" "2012" "2011" "2011" "2019"
 [91] "2016" "2019" "2015" "2017" "2010" "2011" "2021" "2018" "2015" "2015"
[101] "2017" "2021" "2018" "2016" "2019" "2022" "2014" "2020" "2013" "2018"
[111] "2020" "2016" "2021" "2018" "2017" "2018" "2011" "2017" "2014" "2016"
[121] "2017" "2014" "2013" "2013" "2016" "2020" "2018" "2013" "2021" "2014"
[131] "2017" "2016" "2012" "2016" "2020" "2018" "2010" "2020" "2013" "2022"
[141] "2019" "2010" "2010" "2015" "2020" "2020" "2015" "2017" "2019" "2017"
[151] "2015" "2010" "2020" "2019" "2015" "2015" "2014" "2018" "2016" "2014"
[161] "2015" "2016" "2014" "2016" "2012" "2012" "2019" "2010" "2011" "2013"
[171] "2018" "2010" "2022" "2014" "2014" "2010" "2012" "2015" "2011" "2015"
[181] "2017" "2010" "2011" "2013" "2010" "2011" "2018" "2017" "2018" "2017"
[191] "2017" "2017" "2017" "2019" "2018" "2018" "2012" "2013" "2022" "2016"
[201] "2016" "2015" "2012" "2011" "2011" "2016" "2015" "2011" "2013" "2010"
[211] "2018" "2014" "2010" "2011" "2015" "2012" "2013" "2010" "2016" "2017"
[221] "2016" "2014" "2011" "2017" "2019" "2014" "2015" "2018" "2013" "2018"
[231] "2016" "2011" "2014" "2011" "2015" "2015" "2014" "2017" "2010" "2012"
[241] "2020" "2015" "2017" "2016" "2021" "2018" "2015" "2018"
  [1] "3h 17m" "1h 31m" "2h 12m" "1h 57m" "1h 53m" "1h 47m" "3h 16m" "2h 15m"
  [9] "1h 54m" "2h 5m"  "2h 9m"  "3h 8m"  "2h 37m" "2h 14m" "1h 37m" "1h 19m"
 [17] "1h 44m" "2h 16m" "1h 56m" "2h 25m" "1h 52m" "1h 30m" "2h 4m"  "1h 52m"
 [25] "2h 45m" "1h 46m" "1h 50m" "1h 45m" "2h 9m"  "1h 26m" "1h 37m" "1h 36m"
 [33] "1h 37m" "1h 49m" "1h 41m" "1h 45m" "1h 47m" "1h 59m" "2h 19m" "2h 4m" 
 [41] "1h 36m" "2h 18m" "2h 10m" "1h 45m" "2h 31m" "2h 6m"  "2h 21m" "1h 45m"
 [49] "1h 44m" "2h 22m" "1h 52m" "2h 1m"  "2h"     "2h 13m" "1h 55m" "2h 20m"
 [57] "1h 57m" "1h 33m" "1h 41m" "1h 47m" "2h 16m" "1h 55m" "1h 34m" "1h 58m"
 [65] "2h 13m" "2h 14m" "1h 57m" "1h 49m" "1h 45m" "1h 46m" "1h 53m" "1h 36m"
 [73] "2h 1m"  "1h 33m" "2h 5m"  "1h 25m" "1h 53m" "1h 48m" "1h 46m" "1h 54m"
 [81] "2h 1m"  "1h 44m" "1h 53m" "1h 52m" "1h 40m" "1h 39m" "1h 38m" "1h 50m"
 [89] "1h 58m" "2h 6m"  "2h"     "1h 48m" "1h 57m" "1h 44m" "1h 59m" "1h 32m"
 [97] "1h 45m" "1h 36m" "2h 40m" "1h 52m" "1h 40m" "1h 55m" "1h 35m" "2h"    
[105] "1h"     "1h 42m" "1h 50m" "1h 58m" "1h 40m" "1h 52m" "1h"     "1h 44m"
[113] "1h 53m" "2h 5m"  "1h 50m" "1h 35m" "2h 6m"  "1h 55m" "1h 46m" "1h 49m"
[121] "1h 46m" "1h 46m" "1h 37m" "1h 32m" "1h 55m" "1h 52m" "1h 50m" "1h 44m"
[129] "2h 4m"  "1h 45m" "1h 50m" "1h 41m" "1h 31m" "1h 54m" "1h"     "1h 54m"
[137] "1h 44m" "1h 30m" "1h 39m" "1h 56m" "1h"     "1h 35m" "1h 40m" "1h 53m"
[145] "1h 56m" "1h 54m" "1h 48m" "1h 44m" "1h 46m" "1h 48m" "1h 55m" "1h 54m"
[153] "2h 4m"  "2h"     "1h 54m" "1h 59m" "1h 43m" "1h 51m" "1h 50m" "1h 57m"
[161] "1h 43m" "1h 52m" "1h 45m" "1h 47m" "1h 36m" "1h 46m" "1h 50m" "1h 38m"
[169] "1h 30m" "1h 41m" "1h 46m" "1h 42m" "1h 59m" "1h 38m" "1h 42m" "1h 43m"
[177] "2h 5m"  "2h 6m"  "1h 46m" "1h 45m" "1h 43m" "1h 50m" "1h 30m" "1h 45m"
[185] "1h 45m" "1h 45m" "1h 53m" "2h 5m"  "1h 40m" "2h 30m" "1h 54m" "1h 54m"
[193] "1h 35m" "2h 7m"  "1h 45m" "1h 35m" "1h 34m" "1h 45m" "48m"    "1h 40m"
[201] "1h 54m" "1h 37m" "1h 41m" "1h 45m" "2h"     "1h 33m" "1h 39m" "2h 43m"
[209] "1h 37m" "1h 42m" "1h 56m" "1h 47m" "1h 44m" "1h 30m" "1h 58m" "1h 38m"
[217] "1h 40m" "1h 36m" "1h 49m" "1h 48m" "1h 48m" "2h 6m"  "1h 44m" "1h 30m"
[225] "1h 30m" "1h 49m" "1h 58m" "1h 42m" "1h 33m" "2h"     "1h 47m" "1h 42m"
[233] "1h 49m" "1h 51m" "1h 46m" "1h 33m" "1h 36m" "1h 56m" "1h 35m" "1h 41m"
[241] "1h 10m" "1h 54m" "1h 13m" "1h 28m" "1h 35m" "1h 36m" "1h 31m" "56m"   
[1] "48m"
[1] "3 17"
[1] "3"  "17"
[1] "1 31"
[1] "1"  "31"
[1] "2 12"
[1] "2"  "12"
[1] "1 57"
[1] "1"  "57"
[1] "1 53"
[1] "1"  "53"
[1] "1 47"
[1] "1"  "47"
[1] "3 16"
[1] "3"  "16"
[1] "2 15"
[1] "2"  "15"
[1] "1 54"
[1] "1"  "54"
[1] "2 5"
[1] "2" "5"
[1] "2 9"
[1] "2" "9"
[1] "3 8"
[1] "3" "8"
[1] "2 37"
[1] "2"  "37"
[1] "2 14"
[1] "2"  "14"
[1] "1 37"
[1] "1"  "37"
[1] "1 19"
[1] "1"  "19"
[1] "1 44"
[1] "1"  "44"
[1] "2 16"
[1] "2"  "16"
[1] "1 56"
[1] "1"  "56"
[1] "2 25"
[1] "2"  "25"
[1] "1 52"
[1] "1"  "52"
[1] "1 30"
[1] "1"  "30"
[1] "2 4"
[1] "2" "4"
[1] "1 52"
[1] "1"  "52"
[1] "2 45"
[1] "2"  "45"
[1] "1 46"
[1] "1"  "46"
[1] "1 50"
[1] "1"  "50"
[1] "1 45"
[1] "1"  "45"
[1] "2 9"
[1] "2" "9"
[1] "1 26"
[1] "1"  "26"
[1] "1 37"
[1] "1"  "37"
[1] "1 36"
[1] "1"  "36"
[1] "1 37"
[1] "1"  "37"
[1] "1 49"
[1] "1"  "49"
[1] "1 41"
[1] "1"  "41"
[1] "1 45"
[1] "1"  "45"
[1] "1 47"
[1] "1"  "47"
[1] "1 59"
[1] "1"  "59"
[1] "2 19"
[1] "2"  "19"
[1] "2 4"
[1] "2" "4"
[1] "1 36"
[1] "1"  "36"
[1] "2 18"
[1] "2"  "18"
[1] "2 10"
[1] "2"  "10"
[1] "1 45"
[1] "1"  "45"
[1] "2 31"
[1] "2"  "31"
[1] "2 6"
[1] "2" "6"
[1] "2 21"
[1] "2"  "21"
[1] "1 45"
[1] "1"  "45"
[1] "1 44"
[1] "1"  "44"
[1] "2 22"
[1] "2"  "22"
[1] "1 52"
[1] "1"  "52"
[1] "2 1"
[1] "2" "1"
[1] "2"
[1] "2"
[1] "2 13"
[1] "2"  "13"
[1] "1 55"
[1] "1"  "55"
[1] "2 20"
[1] "2"  "20"
[1] "1 57"
[1] "1"  "57"
[1] "1 33"
[1] "1"  "33"
[1] "1 41"
[1] "1"  "41"
[1] "1 47"
[1] "1"  "47"
[1] "2 16"
[1] "2"  "16"
[1] "1 55"
[1] "1"  "55"
[1] "1 34"
[1] "1"  "34"
[1] "1 58"
[1] "1"  "58"
[1] "2 13"
[1] "2"  "13"
[1] "2 14"
[1] "2"  "14"
[1] "1 57"
[1] "1"  "57"
[1] "1 49"
[1] "1"  "49"
[1] "1 45"
[1] "1"  "45"
[1] "1 46"
[1] "1"  "46"
[1] "1 53"
[1] "1"  "53"
[1] "1 36"
[1] "1"  "36"
[1] "2 1"
[1] "2" "1"
[1] "1 33"
[1] "1"  "33"
[1] "2 5"
[1] "2" "5"
[1] "1 25"
[1] "1"  "25"
[1] "1 53"
[1] "1"  "53"
[1] "1 48"
[1] "1"  "48"
[1] "1 46"
[1] "1"  "46"
[1] "1 54"
[1] "1"  "54"
[1] "2 1"
[1] "2" "1"
[1] "1 44"
[1] "1"  "44"
[1] "1 53"
[1] "1"  "53"
[1] "1 52"
[1] "1"  "52"
[1] "1 40"
[1] "1"  "40"
[1] "1 39"
[1] "1"  "39"
[1] "1 38"
[1] "1"  "38"
[1] "1 50"
[1] "1"  "50"
[1] "1 58"
[1] "1"  "58"
[1] "2 6"
[1] "2" "6"
[1] "2"
[1] "2"
[1] "1 48"
[1] "1"  "48"
[1] "1 57"
[1] "1"  "57"
[1] "1 44"
[1] "1"  "44"
[1] "1 59"
[1] "1"  "59"
[1] "1 32"
[1] "1"  "32"
[1] "1 45"
[1] "1"  "45"
[1] "1 36"
[1] "1"  "36"
[1] "2 40"
[1] "2"  "40"
[1] "1 52"
[1] "1"  "52"
[1] "1 40"
[1] "1"  "40"
[1] "1 55"
[1] "1"  "55"
[1] "1 35"
[1] "1"  "35"
[1] "2"
[1] "2"
[1] "1"
[1] "1"
[1] "1 42"
[1] "1"  "42"
[1] "1 50"
[1] "1"  "50"
[1] "1 58"
[1] "1"  "58"
[1] "1 40"
[1] "1"  "40"
[1] "1 52"
[1] "1"  "52"
[1] "1"
[1] "1"
[1] "1 44"
[1] "1"  "44"
[1] "1 53"
[1] "1"  "53"
[1] "2 5"
[1] "2" "5"
[1] "1 50"
[1] "1"  "50"
[1] "1 35"
[1] "1"  "35"
[1] "2 6"
[1] "2" "6"
[1] "1 55"
[1] "1"  "55"
[1] "1 46"
[1] "1"  "46"
[1] "1 49"
[1] "1"  "49"
[1] "1 46"
[1] "1"  "46"
[1] "1 46"
[1] "1"  "46"
[1] "1 37"
[1] "1"  "37"
[1] "1 32"
[1] "1"  "32"
[1] "1 55"
[1] "1"  "55"
[1] "1 52"
[1] "1"  "52"
[1] "1 50"
[1] "1"  "50"
[1] "1 44"
[1] "1"  "44"
[1] "2 4"
[1] "2" "4"
[1] "1 45"
[1] "1"  "45"
[1] "1 50"
[1] "1"  "50"
[1] "1 41"
[1] "1"  "41"
[1] "1 31"
[1] "1"  "31"
[1] "1 54"
[1] "1"  "54"
[1] "1"
[1] "1"
[1] "1 54"
[1] "1"  "54"
[1] "1 44"
[1] "1"  "44"
[1] "1 30"
[1] "1"  "30"
[1] "1 39"
[1] "1"  "39"
[1] "1 56"
[1] "1"  "56"
[1] "1"
[1] "1"
[1] "1 35"
[1] "1"  "35"
[1] "1 40"
[1] "1"  "40"
[1] "1 53"
[1] "1"  "53"
[1] "1 56"
[1] "1"  "56"
[1] "1 54"
[1] "1"  "54"
[1] "1 48"
[1] "1"  "48"
[1] "1 44"
[1] "1"  "44"
[1] "1 46"
[1] "1"  "46"
[1] "1 48"
[1] "1"  "48"
[1] "1 55"
[1] "1"  "55"
[1] "1 54"
[1] "1"  "54"
[1] "2 4"
[1] "2" "4"
[1] "2"
[1] "2"
[1] "1 54"
[1] "1"  "54"
[1] "1 59"
[1] "1"  "59"
[1] "1 43"
[1] "1"  "43"
[1] "1 51"
[1] "1"  "51"
[1] "1 50"
[1] "1"  "50"
[1] "1 57"
[1] "1"  "57"
[1] "1 43"
[1] "1"  "43"
[1] "1 52"
[1] "1"  "52"
[1] "1 45"
[1] "1"  "45"
[1] "1 47"
[1] "1"  "47"
[1] "1 36"
[1] "1"  "36"
[1] "1 46"
[1] "1"  "46"
[1] "1 50"
[1] "1"  "50"
[1] "1 38"
[1] "1"  "38"
[1] "1 30"
[1] "1"  "30"
[1] "1 41"
[1] "1"  "41"
[1] "1 46"
[1] "1"  "46"
[1] "1 42"
[1] "1"  "42"
[1] "1 59"
[1] "1"  "59"
[1] "1 38"
[1] "1"  "38"
[1] "1 42"
[1] "1"  "42"
[1] "1 43"
[1] "1"  "43"
[1] "2 5"
[1] "2" "5"
[1] "2 6"
[1] "2" "6"
[1] "1 46"
[1] "1"  "46"
[1] "1 45"
[1] "1"  "45"
[1] "1 43"
[1] "1"  "43"
[1] "1 50"
[1] "1"  "50"
[1] "1 30"
[1] "1"  "30"
[1] "1 45"
[1] "1"  "45"
[1] "1 45"
[1] "1"  "45"
[1] "1 45"
[1] "1"  "45"
[1] "1 53"
[1] "1"  "53"
[1] "2 5"
[1] "2" "5"
[1] "1 40"
[1] "1"  "40"
[1] "2 30"
[1] "2"  "30"
[1] "1 54"
[1] "1"  "54"
[1] "1 54"
[1] "1"  "54"
[1] "1 35"
[1] "1"  "35"
[1] "2 7"
[1] "2" "7"
[1] "1 45"
[1] "1"  "45"
[1] "1 35"
[1] "1"  "35"
[1] "1 34"
[1] "1"  "34"
[1] "1 45"
[1] "1"  "45"
[1] "48"
[1] "48"
[1] "1 40"
[1] "1"  "40"
[1] "1 54"
[1] "1"  "54"
[1] "1 37"
[1] "1"  "37"
[1] "1 41"
[1] "1"  "41"
[1] "1 45"
[1] "1"  "45"
[1] "2"
[1] "2"
[1] "1 33"
[1] "1"  "33"
[1] "1 39"
[1] "1"  "39"
[1] "2 43"
[1] "2"  "43"
[1] "1 37"
[1] "1"  "37"
[1] "1 42"
[1] "1"  "42"
[1] "1 56"
[1] "1"  "56"
[1] "1 47"
[1] "1"  "47"
[1] "1 44"
[1] "1"  "44"
[1] "1 30"
[1] "1"  "30"
[1] "1 58"
[1] "1"  "58"
[1] "1 38"
[1] "1"  "38"
[1] "1 40"
[1] "1"  "40"
[1] "1 36"
[1] "1"  "36"
[1] "1 49"
[1] "1"  "49"
[1] "1 48"
[1] "1"  "48"
[1] "1 48"
[1] "1"  "48"
[1] "2 6"
[1] "2" "6"
[1] "1 44"
[1] "1"  "44"
[1] "1 30"
[1] "1"  "30"
[1] "1 30"
[1] "1"  "30"
[1] "1 49"
[1] "1"  "49"
[1] "1 58"
[1] "1"  "58"
[1] "1 42"
[1] "1"  "42"
[1] "1 33"
[1] "1"  "33"
[1] "2"
[1] "2"
[1] "1 47"
[1] "1"  "47"
[1] "1 42"
[1] "1"  "42"
[1] "1 49"
[1] "1"  "49"
[1] "1 51"
[1] "1"  "51"
[1] "1 46"
[1] "1"  "46"
[1] "1 33"
[1] "1"  "33"
[1] "1 36"
[1] "1"  "36"
[1] "1 56"
[1] "1"  "56"
[1] "1 35"
[1] "1"  "35"
[1] "1 41"
[1] "1"  "41"
[1] "1 10"
[1] "1"  "10"
[1] "1 54"
[1] "1"  "54"
[1] "1 13"
[1] "1"  "13"
[1] "1 28"
[1] "1"  "28"
[1] "1 35"
[1] "1"  "35"
[1] "1 36"
[1] "1"  "36"
[1] "1 31"
[1] "1"  "31"
[1] "56"
[1] "56"
[[1]]
[1] 197

[[2]]
[1] 91

[[3]]
[1] 132

[[4]]
[1] 117

[[5]]
[1] 113

[[6]]
[1] 107

[[7]]
[1] 196

[[8]]
[1] 135

[[9]]
[1] 114

[[10]]
[1] 125

[[11]]
[1] 129

[[12]]
[1] 188

[[13]]
[1] 157

[[14]]
[1] 134

[[15]]
[1] 97

[[16]]
[1] 79

[[17]]
[1] 104

[[18]]
[1] 136

[[19]]
[1] 116

[[20]]
[1] 145

[[21]]
[1] 112

[[22]]
[1] 90

[[23]]
[1] 124

[[24]]
[1] 112

[[25]]
[1] 165

[[26]]
[1] 106

[[27]]
[1] 110

[[28]]
[1] 105

[[29]]
[1] 129

[[30]]
[1] 86

[[31]]
[1] 97

[[32]]
[1] 96

[[33]]
[1] 97

[[34]]
[1] 109

[[35]]
[1] 101

[[36]]
[1] 105

[[37]]
[1] 107

[[38]]
[1] 119

[[39]]
[1] 139

[[40]]
[1] 124

[[41]]
[1] 96

[[42]]
[1] 138

[[43]]
[1] 130

[[44]]
[1] 105

[[45]]
[1] 151

[[46]]
[1] 126

[[47]]
[1] 141

[[48]]
[1] 105

[[49]]
[1] 104

[[50]]
[1] 142

[[51]]
[1] 112

[[52]]
[1] 121

[[53]]
[1] 120

[[54]]
[1] 133

[[55]]
[1] 115

[[56]]
[1] 140

[[57]]
[1] 117

[[58]]
[1] 93

[[59]]
[1] 101

[[60]]
[1] 107

[[61]]
[1] 136

[[62]]
[1] 115

[[63]]
[1] 94

[[64]]
[1] 118

[[65]]
[1] 133

[[66]]
[1] 134

[[67]]
[1] 117

[[68]]
[1] 109

[[69]]
[1] 105

[[70]]
[1] 106

[[71]]
[1] 113

[[72]]
[1] 96

[[73]]
[1] 121

[[74]]
[1] 93

[[75]]
[1] 125

[[76]]
[1] 85

[[77]]
[1] 113

[[78]]
[1] 108

[[79]]
[1] 106

[[80]]
[1] 114

[[81]]
[1] 121

[[82]]
[1] 104

[[83]]
[1] 113

[[84]]
[1] 112

[[85]]
[1] 100

[[86]]
[1] 99

[[87]]
[1] 98

[[88]]
[1] 110

[[89]]
[1] 118

[[90]]
[1] 126

[[91]]
[1] 120

[[92]]
[1] 108

[[93]]
[1] 117

[[94]]
[1] 104

[[95]]
[1] 119

[[96]]
[1] 92

[[97]]
[1] 105

[[98]]
[1] 96

[[99]]
[1] 160

[[100]]
[1] 112

[[101]]
[1] 100

[[102]]
[1] 115

[[103]]
[1] 95

[[104]]
[1] 120

[[105]]
[1] 60

[[106]]
[1] 102

[[107]]
[1] 110

[[108]]
[1] 118

[[109]]
[1] 100

[[110]]
[1] 112

[[111]]
[1] 60

[[112]]
[1] 104

[[113]]
[1] 113

[[114]]
[1] 125

[[115]]
[1] 110

[[116]]
[1] 95

[[117]]
[1] 126

[[118]]
[1] 115

[[119]]
[1] 106

[[120]]
[1] 109

[[121]]
[1] 106

[[122]]
[1] 106

[[123]]
[1] 97

[[124]]
[1] 92

[[125]]
[1] 115

[[126]]
[1] 112

[[127]]
[1] 110

[[128]]
[1] 104

[[129]]
[1] 124

[[130]]
[1] 105

[[131]]
[1] 110

[[132]]
[1] 101

[[133]]
[1] 91

[[134]]
[1] 114

[[135]]
[1] 60

[[136]]
[1] 114

[[137]]
[1] 104

[[138]]
[1] 90

[[139]]
[1] 99

[[140]]
[1] 116

[[141]]
[1] 60

[[142]]
[1] 95

[[143]]
[1] 100

[[144]]
[1] 113

[[145]]
[1] 116

[[146]]
[1] 114

[[147]]
[1] 108

[[148]]
[1] 104

[[149]]
[1] 106

[[150]]
[1] 108

[[151]]
[1] 115

[[152]]
[1] 114

[[153]]
[1] 124

[[154]]
[1] 120

[[155]]
[1] 114

[[156]]
[1] 119

[[157]]
[1] 103

[[158]]
[1] 111

[[159]]
[1] 110

[[160]]
[1] 117

[[161]]
[1] 103

[[162]]
[1] 112

[[163]]
[1] 105

[[164]]
[1] 107

[[165]]
[1] 96

[[166]]
[1] 106

[[167]]
[1] 110

[[168]]
[1] 98

[[169]]
[1] 90

[[170]]
[1] 101

[[171]]
[1] 106

[[172]]
[1] 102

[[173]]
[1] 119

[[174]]
[1] 98

[[175]]
[1] 102

[[176]]
[1] 103

[[177]]
[1] 125

[[178]]
[1] 126

[[179]]
[1] 106

[[180]]
[1] 105

[[181]]
[1] 103

[[182]]
[1] 110

[[183]]
[1] 90

[[184]]
[1] 105

[[185]]
[1] 105

[[186]]
[1] 105

[[187]]
[1] 113

[[188]]
[1] 125

[[189]]
[1] 100

[[190]]
[1] 150

[[191]]
[1] 114

[[192]]
[1] 114

[[193]]
[1] 95

[[194]]
[1] 127

[[195]]
[1] 105

[[196]]
[1] 95

[[197]]
[1] 94

[[198]]
[1] 105

[[199]]
[1] 48

[[200]]
[1] 100

[[201]]
[1] 114

[[202]]
[1] 97

[[203]]
[1] 101

[[204]]
[1] 105

[[205]]
[1] 120

[[206]]
[1] 93

[[207]]
[1] 99

[[208]]
[1] 163

[[209]]
[1] 97

[[210]]
[1] 102

[[211]]
[1] 116

[[212]]
[1] 107

[[213]]
[1] 104

[[214]]
[1] 90

[[215]]
[1] 118

[[216]]
[1] 98

[[217]]
[1] 100

[[218]]
[1] 96

[[219]]
[1] 109

[[220]]
[1] 108

[[221]]
[1] 108

[[222]]
[1] 126

[[223]]
[1] 104

[[224]]
[1] 90

[[225]]
[1] 90

[[226]]
[1] 109

[[227]]
[1] 118

[[228]]
[1] 102

[[229]]
[1] 93

[[230]]
[1] 120

[[231]]
[1] 107

[[232]]
[1] 102

[[233]]
[1] 109

[[234]]
[1] 111

[[235]]
[1] 106

[[236]]
[1] 93

[[237]]
[1] 96

[[238]]
[1] 116

[[239]]
[1] 95

[[240]]
[1] 101

[[241]]
[1] 70

[[242]]
[1] 114

[[243]]
[1] 73

[[244]]
[1] 88

[[245]]
[1] 95

[[246]]
[1] 96

[[247]]
[1] 91

[[248]]
[1] 56

  [1]   5067   7352  54156   3451   3345  30260  54633 109866   8769  42990
 [11]  11155  27003  49354   7107  12104  15604   2579  13894   3505   5863
 [21]   4479  23118   5154  19217  58914  26555  14322   4752   2779   8439
 [31]   3084   5034   5821   3639   2514   2840  10077   4207  46995  25658
 [41]  13413  23758  13032   5935   5617   7282  13650  25127  13131   4414
 [51]   2721  15825   6789   4275   4367  36261  15774   2588   3384  10484
 [61]  19953   7410  14115  12907   6367  35451  12120   6287   3332  23515
 [71]   4878  11268   5807   3705   4067   2951   4993   5756   3101  43232
 [81]   3487   3720   4707   4008   2787   9513   6500  10398  19477   6391
 [91]   4131   4656   3399   5508  15443   7946   4786   2957   3183  11162
[101]  39267   3594   2876   5009  10466   2992  13674   5004   5735   3777
[111]   9136   4547   5487  38568   5366   9428  11216   7477  10858   3471
[121]   2612   3729   6857   5485  15305   7868  55886   2617   3090   3273
[131]   6768   3196   6094   7829   7648   3943   5160   5415   9179   5262
[141]  13410  20793   2598   3848   3538   4197   5425   3121   3848  73973
[151]   3525   7479   3209   5219  20979   4319   4591   4649   3017  18081
[161]  16479   2632   7918   3658   2833   2708   8626   5456  13539   5093
[171]   2828   7297   2695   3474   3046   7420   6091   4162  15370   9587
[181]   5162   6051   3153   5181   5507   2558   7925   2945  10228   4279
[191]   2668  17578   3567   3129   3609   3046   3887   3570   9920   3938
[201]   6776  18160   5438   9531   5635   4863   3922   3596   5418   5193
[211]   3724   4859  21438   3423   3255   7195   2650   4540   3044   4455
[221]   3460   4128   2840   6956   4770   4831   3933   2568   3512   2861
[231]   2966   4015   7139   3629   3452   3568   4303   4859   5345   3950
[241]   2670  27603   3297   2657  20607   2640   3701   2565
Code
second_url_df <- url_to_df(urls[2])
  [1] "2009" "2005" "1997" "2006" "2002" "1996" "2008" "1965" "1975" "2008"
 [11] "2007" "2004" "2007" "2001" "2008" "2006" "1982" "2009" "2006" "2006"
 [21] "2001" "1998" "2005" "1982" "1976" "2003" "1997" "1996" "2005" "1998"
 [31] "1977" "2001" "2006" "1999" "2004" "1977" "1999" "2001" "2009" "1963"
 [41] "2008" "2008" "1997" "2009" "2006" "2009" "1977" "2007" "2009" "2009"
 [51] "2008" "2009" "2009" "1994" "2005" "2009" "1986" "2006" "1985" "2008"
 [61] "1981" "1987" "2005" "1983" "1976" "1974" "1983" "2004" "1978" "1987"
 [71] "1980" "2009" "1976" "2008" "1978" "1978" "2006" "2009" "1999" "1984"
 [81] "1999" "2007" "1993" "1990" "2006" "2007" "2004" "2008" "1975" "1972"
 [91] "1968" "2007" "1993" "2005" "1973" "2000" "2004" "2007" "2009" "2006"
[101] "2006" "2007" "1970" "1989" "2003" "1979" "1975" "1983" "2005" "1999"
[111] "2008" "2009" "1977" "2003" "1989" "1974" "1996" "1976" "2007" "2008"
[121] "1990" "1973" "2004" "1975" "1980" "2004" "2006" "2008" "2006" "1985"
[131] "2008" "1976" "1982" "1976" "1977" "2008" "2008" "2007" "2005" "1986"
[141] "2007" "1975" "1981" "2009" "1983" "1981" "1999" "2009" "2006" "1983"
[151] "1981" "1980" "1990" "2000" "2004" "1988" "2006" "2006" "2005" "2008"
[161] "1988" "1986" "1987" "1978" "1986" "1977" "1974" "2004" "1990" "1983"
[171] "2000" "2008" "2003" "1984" "2005" "1991" "1978" "1983" "2008" "1983"
[181] "1985" "1978" "1978" "1984" "1975" "1984" "1987" "1984" "1985" "1979"
[191] "2008" "1978" "1980" "2008" "2007" "2000" "1989" "1986" "1981" "1975"
[201] "1978" "1987" "1974" "2005" "1987" "1988" "1985" "1985" "1982" "1976"
[211] "1979" "1984" "2009" "1979" "2008" "1979" "1979" "1979" "1986" "1978"
[221] "2009" "1977"
  [1] "2h 8m"  "1h 48m" "1h 50m" "1h 43m" "1h 50m" "2h 8m"  "2h 7m"  "1h 26m"
  [9] "1h 27m" "1h 49m" "1h 32m" "2h 7m"  "2h 20m" "1h 59m" "1h 53m" "2h 2m" 
 [17] "1h 47m" "1h 52m" "1h 50m" "1h 37m" "1h 50m" "1h 47m" "1h 39m" "1h 31m"
 [25] "1h 30m" "1h 32m" "2h"     "1h 15m" "1h 46m" "1h 42m" "1h 30m" "1h 40m"
 [33] "1h 50m" "1h 32m" "2h 8m"  "1h 30m" "2h 10m" "2h 3m"  "1h 38m" "1h 30m"
 [41] "1h 30m" "1h 29m" "1h 25m" "1h 25m" "2h"     "2h"     "1h 20m" "1h 37m"
 [49] "1h 38m" "1h 36m" "1h 30m" "1h 40m" "1h 47m" "1h 32m" "2h 22m" "1h 37m"
 [57] "1h 40m" "2h 2m"  "1h 41m" "1h 39m" "1h 28m" "1h 59m" "1h 39m" "1h 57m"
 [65] "1h 15m" "1h 27m" "1h 26m" "1h 37m" "1h 35m" "1h 41m" "1h 28m" "2h 2m" 
 [73] "1h 20m" "1h 54m" "1h 23m" "1h 57m" "1h 45m" "2h 2m"  "1h 37m" "1h 33m"
 [81] "2h"     "1h 45m" "1h 43m" "1h 35m" "1h 46m" "1h 40m" "1h 42m" "2h"    
 [89] "1h 35m" "1h 34m" "1h 29m" "1h 45m" "1h 30m" "1h 55m" "1h 25m" "2h"    
 [97] "1h 59m" "1h 35m" "1h 52m" "1h 30m" "2h 15m" "1h 55m" "1h 40m" "1h 25m"
[105] "1h 51m" "1h 17m" "1h 30m" "1h 23m" "1h 29m" "1h 25m" "1h 42m" "1h 40m"
[113] "1h 37m" "1h 53m" "1h 32m" "1h 31m" "1h 59m" "1h 35m" "1h 44m" "2h 10m"
[121] "1h 46m" "1h 12m" "1h 40m" "1h 16m" "1h 25m" "1h 35m" "1h 51m" "1h 56m"
[129] "1h 45m" "1h 14m" "2h"     "1h 34m" "1h 27m" "1h 24m" "1h 37m" "2h 1m" 
[137] "1h 52m" "1h 53m" "1h 38m" "1h 17m" "2h 6m"  "1h 32m" "1h 21m" "1h 50m"
[145] "1h 27m" "1h 34m" "1h 40m" "1h 33m" "1h 40m" "1h 24m" "1h 21m" "1h 19m"
[153] "1h 31m" "1h 45m" "2h 8m"  "1h 31m" "2h 17m" "1h 37m" "1h 52m" "1h 40m"
[161] "1h 29m" "1h 27m" "1h 19m" "1h 28m" "1h 36m" "1h 21m" "1h 24m" "1h 58m"
[169] "1h 25m" "1h 30m" "1h 55m" "1h 39m" "2h 5m"  "1h 33m" "1h 47m" "1h 33m"
[177] "1h 28m" "1h 25m" "1h 21m" "1h 30m" "1h 32m" "1h 11m" "1h 19m" "1h 27m"
[185] "1h 17m" "1h 33m" "1h 34m" "1h 22m" "1h 25m" "1h 23m" "1h 49m" "1h 22m"
[193] "1h 16m" "1h 39m" "1h 40m" "1h 53m" "1h 32m" "1h 23m" "1h 21m" "1h 18m"
[201] "1h 26m" "1h 36m" "1h 26m" "1h 36m" "1h 29m" "1h 47m" "1h 27m" "1h 33m"
[209] "1h 30m" "1h 17m" "1h 22m" "1h 23m" "1h 30m" "1h 23m" "1h 27m" "1h 7m" 
[217] "1h 29m" "1h 19m" "1h 34m" "1h 19m" "1h 53m" "1h 28m"
[1] "1h 21m"
[1] "2 8"
[1] "2" "8"
[1] "1 48"
[1] "1"  "48"
[1] "1 50"
[1] "1"  "50"
[1] "1 43"
[1] "1"  "43"
[1] "1 50"
[1] "1"  "50"
[1] "2 8"
[1] "2" "8"
[1] "2 7"
[1] "2" "7"
[1] "1 26"
[1] "1"  "26"
[1] "1 27"
[1] "1"  "27"
[1] "1 49"
[1] "1"  "49"
[1] "1 32"
[1] "1"  "32"
[1] "2 7"
[1] "2" "7"
[1] "2 20"
[1] "2"  "20"
[1] "1 59"
[1] "1"  "59"
[1] "1 53"
[1] "1"  "53"
[1] "2 2"
[1] "2" "2"
[1] "1 47"
[1] "1"  "47"
[1] "1 52"
[1] "1"  "52"
[1] "1 50"
[1] "1"  "50"
[1] "1 37"
[1] "1"  "37"
[1] "1 50"
[1] "1"  "50"
[1] "1 47"
[1] "1"  "47"
[1] "1 39"
[1] "1"  "39"
[1] "1 31"
[1] "1"  "31"
[1] "1 30"
[1] "1"  "30"
[1] "1 32"
[1] "1"  "32"
[1] "2"
[1] "2"
[1] "1 15"
[1] "1"  "15"
[1] "1 46"
[1] "1"  "46"
[1] "1 42"
[1] "1"  "42"
[1] "1 30"
[1] "1"  "30"
[1] "1 40"
[1] "1"  "40"
[1] "1 50"
[1] "1"  "50"
[1] "1 32"
[1] "1"  "32"
[1] "2 8"
[1] "2" "8"
[1] "1 30"
[1] "1"  "30"
[1] "2 10"
[1] "2"  "10"
[1] "2 3"
[1] "2" "3"
[1] "1 38"
[1] "1"  "38"
[1] "1 30"
[1] "1"  "30"
[1] "1 30"
[1] "1"  "30"
[1] "1 29"
[1] "1"  "29"
[1] "1 25"
[1] "1"  "25"
[1] "1 25"
[1] "1"  "25"
[1] "2"
[1] "2"
[1] "2"
[1] "2"
[1] "1 20"
[1] "1"  "20"
[1] "1 37"
[1] "1"  "37"
[1] "1 38"
[1] "1"  "38"
[1] "1 36"
[1] "1"  "36"
[1] "1 30"
[1] "1"  "30"
[1] "1 40"
[1] "1"  "40"
[1] "1 47"
[1] "1"  "47"
[1] "1 32"
[1] "1"  "32"
[1] "2 22"
[1] "2"  "22"
[1] "1 37"
[1] "1"  "37"
[1] "1 40"
[1] "1"  "40"
[1] "2 2"
[1] "2" "2"
[1] "1 41"
[1] "1"  "41"
[1] "1 39"
[1] "1"  "39"
[1] "1 28"
[1] "1"  "28"
[1] "1 59"
[1] "1"  "59"
[1] "1 39"
[1] "1"  "39"
[1] "1 57"
[1] "1"  "57"
[1] "1 15"
[1] "1"  "15"
[1] "1 27"
[1] "1"  "27"
[1] "1 26"
[1] "1"  "26"
[1] "1 37"
[1] "1"  "37"
[1] "1 35"
[1] "1"  "35"
[1] "1 41"
[1] "1"  "41"
[1] "1 28"
[1] "1"  "28"
[1] "2 2"
[1] "2" "2"
[1] "1 20"
[1] "1"  "20"
[1] "1 54"
[1] "1"  "54"
[1] "1 23"
[1] "1"  "23"
[1] "1 57"
[1] "1"  "57"
[1] "1 45"
[1] "1"  "45"
[1] "2 2"
[1] "2" "2"
[1] "1 37"
[1] "1"  "37"
[1] "1 33"
[1] "1"  "33"
[1] "2"
[1] "2"
[1] "1 45"
[1] "1"  "45"
[1] "1 43"
[1] "1"  "43"
[1] "1 35"
[1] "1"  "35"
[1] "1 46"
[1] "1"  "46"
[1] "1 40"
[1] "1"  "40"
[1] "1 42"
[1] "1"  "42"
[1] "2"
[1] "2"
[1] "1 35"
[1] "1"  "35"
[1] "1 34"
[1] "1"  "34"
[1] "1 29"
[1] "1"  "29"
[1] "1 45"
[1] "1"  "45"
[1] "1 30"
[1] "1"  "30"
[1] "1 55"
[1] "1"  "55"
[1] "1 25"
[1] "1"  "25"
[1] "2"
[1] "2"
[1] "1 59"
[1] "1"  "59"
[1] "1 35"
[1] "1"  "35"
[1] "1 52"
[1] "1"  "52"
[1] "1 30"
[1] "1"  "30"
[1] "2 15"
[1] "2"  "15"
[1] "1 55"
[1] "1"  "55"
[1] "1 40"
[1] "1"  "40"
[1] "1 25"
[1] "1"  "25"
[1] "1 51"
[1] "1"  "51"
[1] "1 17"
[1] "1"  "17"
[1] "1 30"
[1] "1"  "30"
[1] "1 23"
[1] "1"  "23"
[1] "1 29"
[1] "1"  "29"
[1] "1 25"
[1] "1"  "25"
[1] "1 42"
[1] "1"  "42"
[1] "1 40"
[1] "1"  "40"
[1] "1 37"
[1] "1"  "37"
[1] "1 53"
[1] "1"  "53"
[1] "1 32"
[1] "1"  "32"
[1] "1 31"
[1] "1"  "31"
[1] "1 59"
[1] "1"  "59"
[1] "1 35"
[1] "1"  "35"
[1] "1 44"
[1] "1"  "44"
[1] "2 10"
[1] "2"  "10"
[1] "1 46"
[1] "1"  "46"
[1] "1 12"
[1] "1"  "12"
[1] "1 40"
[1] "1"  "40"
[1] "1 16"
[1] "1"  "16"
[1] "1 25"
[1] "1"  "25"
[1] "1 35"
[1] "1"  "35"
[1] "1 51"
[1] "1"  "51"
[1] "1 56"
[1] "1"  "56"
[1] "1 45"
[1] "1"  "45"
[1] "1 14"
[1] "1"  "14"
[1] "2"
[1] "2"
[1] "1 34"
[1] "1"  "34"
[1] "1 27"
[1] "1"  "27"
[1] "1 24"
[1] "1"  "24"
[1] "1 37"
[1] "1"  "37"
[1] "2 1"
[1] "2" "1"
[1] "1 52"
[1] "1"  "52"
[1] "1 53"
[1] "1"  "53"
[1] "1 38"
[1] "1"  "38"
[1] "1 17"
[1] "1"  "17"
[1] "2 6"
[1] "2" "6"
[1] "1 32"
[1] "1"  "32"
[1] "1 21"
[1] "1"  "21"
[1] "1 50"
[1] "1"  "50"
[1] "1 27"
[1] "1"  "27"
[1] "1 34"
[1] "1"  "34"
[1] "1 40"
[1] "1"  "40"
[1] "1 33"
[1] "1"  "33"
[1] "1 40"
[1] "1"  "40"
[1] "1 24"
[1] "1"  "24"
[1] "1 21"
[1] "1"  "21"
[1] "1 19"
[1] "1"  "19"
[1] "1 31"
[1] "1"  "31"
[1] "1 45"
[1] "1"  "45"
[1] "2 8"
[1] "2" "8"
[1] "1 31"
[1] "1"  "31"
[1] "2 17"
[1] "2"  "17"
[1] "1 37"
[1] "1"  "37"
[1] "1 52"
[1] "1"  "52"
[1] "1 40"
[1] "1"  "40"
[1] "1 29"
[1] "1"  "29"
[1] "1 27"
[1] "1"  "27"
[1] "1 19"
[1] "1"  "19"
[1] "1 28"
[1] "1"  "28"
[1] "1 36"
[1] "1"  "36"
[1] "1 21"
[1] "1"  "21"
[1] "1 24"
[1] "1"  "24"
[1] "1 58"
[1] "1"  "58"
[1] "1 25"
[1] "1"  "25"
[1] "1 30"
[1] "1"  "30"
[1] "1 55"
[1] "1"  "55"
[1] "1 39"
[1] "1"  "39"
[1] "2 5"
[1] "2" "5"
[1] "1 33"
[1] "1"  "33"
[1] "1 47"
[1] "1"  "47"
[1] "1 33"
[1] "1"  "33"
[1] "1 28"
[1] "1"  "28"
[1] "1 25"
[1] "1"  "25"
[1] "1 21"
[1] "1"  "21"
[1] "1 30"
[1] "1"  "30"
[1] "1 32"
[1] "1"  "32"
[1] "1 11"
[1] "1"  "11"
[1] "1 19"
[1] "1"  "19"
[1] "1 27"
[1] "1"  "27"
[1] "1 17"
[1] "1"  "17"
[1] "1 33"
[1] "1"  "33"
[1] "1 34"
[1] "1"  "34"
[1] "1 22"
[1] "1"  "22"
[1] "1 25"
[1] "1"  "25"
[1] "1 23"
[1] "1"  "23"
[1] "1 49"
[1] "1"  "49"
[1] "1 22"
[1] "1"  "22"
[1] "1 16"
[1] "1"  "16"
[1] "1 39"
[1] "1"  "39"
[1] "1 40"
[1] "1"  "40"
[1] "1 53"
[1] "1"  "53"
[1] "1 32"
[1] "1"  "32"
[1] "1 23"
[1] "1"  "23"
[1] "1 21"
[1] "1"  "21"
[1] "1 18"
[1] "1"  "18"
[1] "1 26"
[1] "1"  "26"
[1] "1 36"
[1] "1"  "36"
[1] "1 26"
[1] "1"  "26"
[1] "1 36"
[1] "1"  "36"
[1] "1 29"
[1] "1"  "29"
[1] "1 47"
[1] "1"  "47"
[1] "1 27"
[1] "1"  "27"
[1] "1 33"
[1] "1"  "33"
[1] "1 30"
[1] "1"  "30"
[1] "1 17"
[1] "1"  "17"
[1] "1 22"
[1] "1"  "22"
[1] "1 23"
[1] "1"  "23"
[1] "1 30"
[1] "1"  "30"
[1] "1 23"
[1] "1"  "23"
[1] "1 27"
[1] "1"  "27"
[1] "1 7"
[1] "1" "7"
[1] "1 29"
[1] "1"  "29"
[1] "1 19"
[1] "1"  "19"
[1] "1 34"
[1] "1"  "34"
[1] "1 19"
[1] "1"  "19"
[1] "1 53"
[1] "1"  "53"
[1] "1 28"
[1] "1"  "28"
[[1]]
[1] 128

[[2]]
[1] 108

[[3]]
[1] 110

[[4]]
[1] 103

[[5]]
[1] 110

[[6]]
[1] 128

[[7]]
[1] 127

[[8]]
[1] 86

[[9]]
[1] 87

[[10]]
[1] 109

[[11]]
[1] 92

[[12]]
[1] 127

[[13]]
[1] 140

[[14]]
[1] 119

[[15]]
[1] 113

[[16]]
[1] 122

[[17]]
[1] 107

[[18]]
[1] 112

[[19]]
[1] 110

[[20]]
[1] 97

[[21]]
[1] 110

[[22]]
[1] 107

[[23]]
[1] 99

[[24]]
[1] 91

[[25]]
[1] 90

[[26]]
[1] 92

[[27]]
[1] 120

[[28]]
[1] 75

[[29]]
[1] 106

[[30]]
[1] 102

[[31]]
[1] 90

[[32]]
[1] 100

[[33]]
[1] 110

[[34]]
[1] 92

[[35]]
[1] 128

[[36]]
[1] 90

[[37]]
[1] 130

[[38]]
[1] 123

[[39]]
[1] 98

[[40]]
[1] 90

[[41]]
[1] 90

[[42]]
[1] 89

[[43]]
[1] 85

[[44]]
[1] 85

[[45]]
[1] 120

[[46]]
[1] 120

[[47]]
[1] 80

[[48]]
[1] 97

[[49]]
[1] 98

[[50]]
[1] 96

[[51]]
[1] 90

[[52]]
[1] 100

[[53]]
[1] 107

[[54]]
[1] 92

[[55]]
[1] 142

[[56]]
[1] 97

[[57]]
[1] 100

[[58]]
[1] 122

[[59]]
[1] 101

[[60]]
[1] 99

[[61]]
[1] 88

[[62]]
[1] 119

[[63]]
[1] 99

[[64]]
[1] 117

[[65]]
[1] 75

[[66]]
[1] 87

[[67]]
[1] 86

[[68]]
[1] 97

[[69]]
[1] 95

[[70]]
[1] 101

[[71]]
[1] 88

[[72]]
[1] 122

[[73]]
[1] 80

[[74]]
[1] 114

[[75]]
[1] 83

[[76]]
[1] 117

[[77]]
[1] 105

[[78]]
[1] 122

[[79]]
[1] 97

[[80]]
[1] 93

[[81]]
[1] 120

[[82]]
[1] 105

[[83]]
[1] 103

[[84]]
[1] 95

[[85]]
[1] 106

[[86]]
[1] 100

[[87]]
[1] 102

[[88]]
[1] 120

[[89]]
[1] 95

[[90]]
[1] 94

[[91]]
[1] 89

[[92]]
[1] 105

[[93]]
[1] 90

[[94]]
[1] 115

[[95]]
[1] 85

[[96]]
[1] 120

[[97]]
[1] 119

[[98]]
[1] 95

[[99]]
[1] 112

[[100]]
[1] 90

[[101]]
[1] 135

[[102]]
[1] 115

[[103]]
[1] 100

[[104]]
[1] 85

[[105]]
[1] 111

[[106]]
[1] 77

[[107]]
[1] 90

[[108]]
[1] 83

[[109]]
[1] 89

[[110]]
[1] 85

[[111]]
[1] 102

[[112]]
[1] 100

[[113]]
[1] 97

[[114]]
[1] 113

[[115]]
[1] 92

[[116]]
[1] 91

[[117]]
[1] 119

[[118]]
[1] 95

[[119]]
[1] 104

[[120]]
[1] 130

[[121]]
[1] 106

[[122]]
[1] 72

[[123]]
[1] 100

[[124]]
[1] 76

[[125]]
[1] 85

[[126]]
[1] 95

[[127]]
[1] 111

[[128]]
[1] 116

[[129]]
[1] 105

[[130]]
[1] 74

[[131]]
[1] 120

[[132]]
[1] 94

[[133]]
[1] 87

[[134]]
[1] 84

[[135]]
[1] 97

[[136]]
[1] 121

[[137]]
[1] 112

[[138]]
[1] 113

[[139]]
[1] 98

[[140]]
[1] 77

[[141]]
[1] 126

[[142]]
[1] 92

[[143]]
[1] 81

[[144]]
[1] 110

[[145]]
[1] 87

[[146]]
[1] 94

[[147]]
[1] 100

[[148]]
[1] 93

[[149]]
[1] 100

[[150]]
[1] 84

[[151]]
[1] 81

[[152]]
[1] 79

[[153]]
[1] 91

[[154]]
[1] 105

[[155]]
[1] 128

[[156]]
[1] 91

[[157]]
[1] 137

[[158]]
[1] 97

[[159]]
[1] 112

[[160]]
[1] 100

[[161]]
[1] 89

[[162]]
[1] 87

[[163]]
[1] 79

[[164]]
[1] 88

[[165]]
[1] 96

[[166]]
[1] 81

[[167]]
[1] 84

[[168]]
[1] 118

[[169]]
[1] 85

[[170]]
[1] 90

[[171]]
[1] 115

[[172]]
[1] 99

[[173]]
[1] 125

[[174]]
[1] 93

[[175]]
[1] 107

[[176]]
[1] 93

[[177]]
[1] 88

[[178]]
[1] 85

[[179]]
[1] 81

[[180]]
[1] 90

[[181]]
[1] 92

[[182]]
[1] 71

[[183]]
[1] 79

[[184]]
[1] 87

[[185]]
[1] 77

[[186]]
[1] 93

[[187]]
[1] 94

[[188]]
[1] 82

[[189]]
[1] 85

[[190]]
[1] 83

[[191]]
[1] 109

[[192]]
[1] 82

[[193]]
[1] 76

[[194]]
[1] 99

[[195]]
[1] 100

[[196]]
[1] 113

[[197]]
[1] 92

[[198]]
[1] 83

[[199]]
[1] 81

[[200]]
[1] 78

[[201]]
[1] 86

[[202]]
[1] 96

[[203]]
[1] 86

[[204]]
[1] 96

[[205]]
[1] 89

[[206]]
[1] 107

[[207]]
[1] 87

[[208]]
[1] 93

[[209]]
[1] 90

[[210]]
[1] 77

[[211]]
[1] 82

[[212]]
[1] 83

[[213]]
[1] 90

[[214]]
[1] 83

[[215]]
[1] 87

[[216]]
[1] 67

[[217]]
[1] 89

[[218]]
[1] 79

[[219]]
[1] 94

[[220]]
[1] 79

[[221]]
[1] 113

[[222]]
[1] 88

  [1] 35019 91026 19286 16253 22368 71699 44633  7127 42513 22657 14682 66029
 [13] 23631  6712 22333 18250 14304 37781  4735 14002 38400 27119 22665  8014
 [25] 24327  3162 11722 10847 35021 15720 16722  3918 16704  4261 12401 18535
 [37]  6629  4120 13652  5491 28355  6059  6426  2662 11252 10573 21182  5593
 [49]  7087  3459  3211 13501 22332  3371 10405  5288  3498 34177 16134  6739
 [61]  6090 11512  6864  4382  4199  6438  3313  6459 11806  4444 10563  5129
 [73] 20886  7030 17126  5258  3640  5818  7512  3026 10027  7797  3718  3387
 [85]  8139  3899  6491 12604 24370  2805  3249  9842  3204  3140 10096  3273
 [97]  9739  4350  2554  2627 13077  7337  4243  7076 16374  7219  4760  6177
[109]  6058  3641  3317  7524 18637  4025  8299 11647  3817  3791  5256  2844
[121]  4240  3636  3631  9552  7351  3921  4371  4027  9616  4935  5446 20640
[133] 11020 13359  7591  3431  2938  6234  8697  3000  6802  7213  3403  2792
[145]  4068  9203  3711  4947  6541  7475  6186  2577  4112  2602  3289  2998
[157]  2766  4276  7787  7220  2619  3727  3419  3829  4270  8949  7832  6963
[169]  2631  2712  2806  3020  2742  3707  4661  2819  8940  3468  3088  6746
[181]  3528  4381  5934  4601  4367  7352  2713  4955  4136  3247  4834  3037
[193]  3139  2561  7047  3433  2901  3152  3933  2737  4460  4554  3218  3259
[205]  3782  3903  3284  2971  2677  6253  6577  5136  2735  3582  5551  2710
[217]  4349  4090  2710  3026  3907  3498
Code
final_df <- bind_rows(first_url_df, second_url_df)
col_names <- c("Title", "Year", "Duration", "Rating", "Votes")
colnames(final_df) <- col_names

PART 3

a)

Code
order_rating <- final_df |> arrange(desc(Rating))

print(head(order_rating, 5))
                         Title Year Duration Rating Votes
1               Hababam Sinifi 1975       87    9.2 42513
2       CM101MMXI Fundamentals 2013      139    9.1 46995
3                   Tosun Pasa 1976       90    8.9 24327
4 Hababam Sinifi Sinifta Kaldi 1975       95    8.9 24370
5                Süt Kardesler 1976       80    8.8 20886
Code
print(tail(order_rating, 5))
                             Title Year Duration Rating Votes
466                 Cumali Ceber 2 2018      100    1.2 10228
467                          Müjde 2022       48    1.2  9920
468              15/07 Safak Vakti 2021       95    1.2 20607
469 Cumali Ceber: Allah Seni Alsin 2017      100    1.0 39267
470                           Reis 2017      108    1.0 73973

I wasn’t surprised by the placement of the first 5 movies, but I didn’t expect the last movie to be that bottom.

b)

Code
my_fav_movies <- c("Recep Ivedik", "Recep Ivedik 2", "Recep Ivedik 3")
my_fav_movies_df <- final_df |> filter(Title %in% my_fav_movies)
print(my_fav_movies_df$Rating)
[1] 4.2 4.8 4.5
Code
which(order_rating$Title %in% my_fav_movies)
[1] 406 417 422

c)

Code
final_df |> group_by(Year) |> summarize(ratings_mean = mean(Rating)) |>
  ggplot(aes(Year, ratings_mean)) + geom_point()

Code
final_df |> group_by(Year) |>
  ggplot(aes(Year)) + geom_point(stat = "count")

Code
final_df |> ggplot(aes(x = as.factor(Year), y = Rating)) +
  geom_boxplot() + theme(axis.text.x = element_text(angle = 90))

Unfortunately, the mean ratings of movies coming out of Turkey have decreased over the years, and the number of movies coming out of Turkey has increased. Box plot shows us that not only the average ratings of movies coming out of our country have decreased, and also the value of the lowest ratings over the years has decreased.

d)

Code
final_df |> ggplot(aes(Votes, Rating)) + geom_line(color = "blue") + scale_x_log10()

Looking at this graph, I don’t see a correlation between Votes and Rating.

e)

Code
final_df |> ggplot(aes(Duration, Rating)) + geom_line(color = "purple")

Looking at this graph, I don’t see a correlation between Duration and Rating.

PART 4

Code
data_html <- read_html("https://m.imdb.com/search/title/?groups=top_1000&country_of_origin=TR")
title_names <- data_html |> html_nodes('a h3')
title_names <- html_text(title_names)
title_names <- str_split(title_names, " ", n=2)
title_names <- unlist(lapply(title_names, function(x) {x[2]}))
title_names_df <- data.frame(title_names)

years <- data_html |> html_nodes(".sc-43986a27-8.jHYIIK.dli-title-metadata-item:nth-child(1)")
years <- html_text(years)
print(years)
 [1] "2019" "2014" "2009" "2017" "2005" "2018" "2011" "1996" "2004" "2001"
[11] "1998"
Code
years_df <- data.frame(as.integer(years))
new_df <- bind_cols(title_names_df, years_df)
col_names <- c("Title", "Year")
colnames(new_df) <- col_names

new_data_join <- inner_join(new_df, final_df, c("Title","Year"), keep = FALSE)
print(new_data_join)
                       Title Year Duration Rating Votes
1   Yedinci Kogustaki Mucize 2019      132    8.2 54156
2                 Kis Uykusu 2014      196    8.0 54633
3      Nefes: Vatan Sagolsun 2009      128    8.0 35019
4  Ayla: The Daughter of War 2017      125    8.3 42990
5             Babam ve Oglum 2005      108    8.2 91026
6                Ahlat Agaci 2018      188    8.0 27003
7    Bir Zamanlar Anadolu'da 2011      157    7.8 49354
8                     Eskiya 1996      128    8.1 71699
9                   G.O.R.A. 2004      127    8.0 66029
10                 Vizontele 2001      110    8.0 38400
11  Her Sey Çok Güzel Olacak 1998      107    8.1 27119
Code
print(head((final_df |> arrange(desc(Rating))), 11))
                          Title Year Duration Rating Votes
1                Hababam Sinifi 1975       87    9.2 42513
2        CM101MMXI Fundamentals 2013      139    9.1 46995
3                    Tosun Pasa 1976       90    8.9 24327
4  Hababam Sinifi Sinifta Kaldi 1975       95    8.9 24370
5                 Süt Kardesler 1976       80    8.8 20886
6              Saban Oglu Saban 1977       90    8.7 18535
7                    Zügürt Aga 1985      101    8.7 16134
8                 Neseli Günler 1978       95    8.7 11806
9                   Kibar Feyzo 1978       83    8.7 17126
10      Hababam Sinifi Uyaniyor 1976       94    8.7 20640
11               Canim Kardesim 1973       85    8.6 10096
Code
print(head((new_data_join |> arrange(desc(Rating))), 11))
                       Title Year Duration Rating Votes
1  Ayla: The Daughter of War 2017      125    8.3 42990
2   Yedinci Kogustaki Mucize 2019      132    8.2 54156
3             Babam ve Oglum 2005      108    8.2 91026
4                     Eskiya 1996      128    8.1 71699
5   Her Sey Çok Güzel Olacak 1998      107    8.1 27119
6                 Kis Uykusu 2014      196    8.0 54633
7      Nefes: Vatan Sagolsun 2009      128    8.0 35019
8                Ahlat Agaci 2018      188    8.0 27003
9                   G.O.R.A. 2004      127    8.0 66029
10                 Vizontele 2001      110    8.0 38400
11   Bir Zamanlar Anadolu'da 2011      157    7.8 49354

As we see in the tables, the order is different. At first I thought that there was an arrangement based on a certain limit of votes, but this does not seem to be the case. Looking at these tables, I can say that there is a possibility that IMDB run an algorithm which takes years into consideration.

Back to top