YouTubeで検索して視聴回数が多い動画のurlを取得します。
今回もSeleniumを使います。NuGetでSelenium.WebDriverとSelenium.Support、そして操作したいブラウザのDriverをインストールします。今回はChromeを操作するのでSelenium.Chrome.WebDriverをインストールします。それから結果をExcelファイルとして保存するのでClosedXMLもインストールしておきましょう。
それから参照にSystem.Web.dllを追加しておいてください。
取得するのは、動画タイトル、動画url、チャンネル名、チャンネルurl、再生数、動画がアップされた日です。
再生回数順で検索した場合、検索結果のurlは以下のようになります。
https://www.youtube.com/results?search_query={0}&sp=CAMSAhAB
また、検索結果のページのHTMLを調べてみると以下のような構造になっています。ここから必要な情報を取得することができます。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |   <div class="text-wrapper style-scope ytd-video-renderer">     <div id="meta" class="style-scope ytd-video-renderer">       <div id="title-wrapper" class="style-scope ytd-video-renderer">         <h3 class="title-and-badge style-scope ytd-video-renderer">           // 省略           <a id="video-title" href="/watch?v=XXXX" class="yt-simple-endpoint style-scope ytd-video-renderer" title="">             // 省略             <yt-formatted-string class="style-scope ytd-video-renderer" aria-label="">               【動画タイトル】             </yt-formatted-string>           </a>         </h3>         // 省略       </div>       <ytd-video-meta-block class="style-scope ytd-video-renderer byline-separated">         <div id="metadata" class="style-scope ytd-video-meta-block">           <div id="byline-container" class="style-scope ytd-video-meta-block" hidden="">             <ytd-channel-name id="channel-name" class="style-scope ytd-video-meta-block">               <div id="container" class="style-scope ytd-channel-name">                 <div id="text-container" class="style-scope ytd-channel-name">                   <yt-formatted-string id="text" title="" class="style-scope ytd-channel-name complex-string" ellipsis-truncate="" has-link-only_="">                     <a class="yt-simple-endpoint style-scope yt-formatted-string" spellcheck="false" href="/user/XXXX" dir="auto">                     【チャンネル名】                   </yt-formatted-string>                 </div>                 // 省略               </div>               <ytd-badge-supported-renderer class="style-scope ytd-channel-name" disable-upgrade="" hidden="">               </ytd-badge-supported-renderer>             </ytd-channel-name>             <div id="separator" class="style-scope ytd-video-meta-block">?</div>           </div>           <div id="metadata-line" class="style-scope ytd-video-meta-block">             <span class="style-scope ytd-video-meta-block">376万 回視聴</span>             <span class="style-scope ytd-video-meta-block">2 年前</span>             // 省略           </div>         </div>         // 省略       </ytd-video-meta-block>     </div>     <div id="channel-info" class="style-scope ytd-video-renderer">       // 省略     </div>     // 省略   </div> | 
そこで以下の方法でYouTubeの検索結果から動画情報を抜き出すことができます。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | using OpenQA.Selenium.Chrome; using AngleSharp.Html.Parser; public partial class Form1 : Form {     private void button1_Click(object sender, EventArgs e)     {         string keyword = textBox1.Text;         if (keyword == "")         {             MessageBox.Show("検索ワードを入力してください");             return;         }         string str = System.Web.HttpUtility.UrlEncode(textBox1.Text);         string searchUrl = String.Format("https://www.youtube.com/results?search_query={0}&sp=CAMSAhAB", str);         ChromeDriver driver = new ChromeDriver();         driver.Url = searchUrl;         string source = driver.PageSource;         HtmlParser parser = new HtmlParser();         var doc = parser.ParseDocument(source);         var elms = doc.GetElementsByClassName("text-wrapper style-scope ytd-video-renderer");         foreach (var elm in elms)         {             var links = elm.GetElementsByTagName("a");             foreach (var link in links)             {                 if (link.GetAttribute("id") == "video-title")                 {                     string url = "https://www.youtube.com/" + link.GetAttribute("href");                     string title = link.TextContent;                     title = title.Trim();                     // 動画のタイトルとurlが取得できる                 }                 if (link.GetAttribute("class") == "yt-simple-endpoint style-scope yt-formatted-string")                 {                     string url = "https://www.youtube.com/" + link.GetAttribute("href");                     string title = link.TextContent;                     title = title.Trim();                     // チャンネル名とurlが取得できる                 }             }         }         driver.Quit();         driver.Dispose();     } } | 
あとは自動的にスクロールさせて次々に必要なデータを取得していくだけです。
取得したデータを格納するクラスを作成します。動画のタイトルとurl、チャンネル名とチャンネルurl、視聴回数、アップロードされた時期を格納します。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | public class Data {     public Data(string videoTitle, string videoUrl, string channelName, string channelUrl, string count, string uploadTime)     {         VideoTitle = videoTitle;         VideoUrl = videoUrl;         ChannelName = channelName;         ChannelUrl = channelUrl;         Count = count;         UploadTime = uploadTime;     }     public string VideoTitle = "";     public string VideoUrl = "";     public string ChannelName = "";     public string ChannelUrl = "";     public string Count = "";     public string UploadTime = ""; } | 
Form1クラスのコンストラクタを示します。numericUpDown1は最大取得数を設定するためのものです。コンストラクタ内でChromeDriverを生成します。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | using OpenQA.Selenium.Chrome; public partial class Form1 : Form {     public Form1()     {         InitializeComponent();         CreateChromeDriver();         numericUpDown1.Minimum = 1;         numericUpDown1.Maximum = 1000;         numericUpDown1.Value = 10;     }     ChromeDriver driver = null;     void CreateChromeDriver()     {         ChromeDriverService driverService = ChromeDriverService.CreateDefaultService();         driverService.HideCommandPromptWindow = true;         ChromeOptions options = new ChromeOptions();         options.AddArgument("--headless");         driver = new ChromeDriver(driverService, options);     } } | 
検索ボタンが押されたら自作メソッドGetVideoDatasを呼び出して検索結果を取得します。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | public partial class Form1 : Form {     private void button1_Click(object sender, EventArgs e)     {         string keyword = textBox1.Text;         if (keyword == "")         {             MessageBox.Show("検索ワードを入力してください");             return;         }         string filePath = "";         SaveFileDialog dialog = new SaveFileDialog();         dialog.Filter = "Excelファイル(*.xlsx)|*.xlsx";         if (dialog.ShowDialog() == DialogResult.OK)             filePath = dialog.FileName;         dialog.Dispose();         if (filePath == "")             return;         Task.Run(() => {             Invoke((Action)(() => {                 button1.Enabled = false;             }));             List<Data> datas = GetVideoDatas(keyword, (int)numericUpDown1.Value);             SaveExcel(filePath, datas);             MessageBox.Show("完了しました");             Invoke((Action)(() => {                 button1.Enabled = true;             }));         });     } } | 
検索結果が読み込まれたらHTMLを解析して動画に関する情報を取得します。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | using AngleSharp.Html.Parser; public partial class Form1 : Form {     List<Data> GetVideoDatas(string searchWord, int max)     {         Invoke((Action)(() => {             progressBar1.Maximum = max;             progressBar1.Value = 0;         }));         List<Data> datas = new List<Data>();         string str = System.Web.HttpUtility.UrlEncode(searchWord);         string searchUrl = String.Format("https://www.youtube.com/results?search_query={0}&sp=CAMSAhAB", str);         driver.Url = searchUrl;         HtmlParser parser = new HtmlParser();         while (true)         {             Invoke((Action)(() => {                 progressBar2.Maximum = 10;                 progressBar2.Value = 0;             }));             for (int i = 0; i < 10; i++)             {                 System.Threading.Thread.Sleep(500);                 Invoke((Action)(() => {                     progressBar2.Value++;                 }));             }             string source = driver.PageSource;             var doc = parser.ParseDocument(source);             var elms = doc.GetElementsByClassName("text-wrapper style-scope ytd-video-renderer");             List<Data> datas2 = new List<Data>();             foreach (var elm in elms)             {                 string videoTitle = "";                 string videoUrl = "";                 string channelName = "";                 string channelUrl = "";                 string count = "";                 string uploadTime = "";                 var links = elm.GetElementsByTagName("a");                 foreach (var link in links)                 {                     if (link.GetAttribute("id") == "video-title")                     {                         videoUrl = "https://www.youtube.com/" + link.GetAttribute("href");                         string title = link.TextContent;                         videoTitle = title.Trim();                     }                     if (link.GetAttribute("class") == "yt-simple-endpoint style-scope yt-formatted-string")                     {                         channelUrl = "https://www.youtube.com/" + link.GetAttribute("href");                         string title = link.TextContent;                         channelName = title.Trim();                     }                 }                 var elms2 = elm.GetElementsByClassName("style-scope ytd-video-meta-block");                 var elm2 = elms2.FirstOrDefault(x => x.GetAttribute("id") == "metadata-line");                 if (elm2 != null)                 {                     elms2 = elm2.GetElementsByClassName("style-scope ytd-video-meta-block");                     count = elms2[0].TextContent.Replace("回視聴", "");                     count = count.Replace(",", "");                     count = count.Replace(" ", "");                     uploadTime = elms2[1].TextContent;                 }                 if (!datas.Any(x => x.VideoUrl == videoUrl) && !datas2.Any(x => x.VideoUrl == videoUrl))                 {                     datas2.Add(new Data(videoTitle, videoUrl, channelName, channelUrl, count, uploadTime));                     Invoke((Action)(() => {                         progressBar1.Value++;                     }));                     if (datas.Count + datas2.Count >= max)                         break;                 }             }             if (datas2.Count == 0)             {                 MessageBox.Show("");                 return datas;             }             datas.AddRange(datas2);             if (datas.Count >= max)                 return datas;             int y = driver.FindElementsByTagName("a").Max(x => x.Location.Y);             string script = string.Format("window.scrollBy(0, {0})", y);             driver.ExecuteScript(script);         }     } } | 
データが取得できたらexcelファイルとして保存します。
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | using ClosedXML.Excel; public partial class Form1 : Form {     void SaveExcel(string excelPath, List<Data> datas)     {         using (var workbook = new XLWorkbook())         {             var worksheet = workbook.Worksheets.Add("シート1");             worksheet.Cell(1, "A").Value = "動画タイトル";             worksheet.Cell(1, "B").Value = "動画Url";             worksheet.Cell(1, "C").Value = "チャンネル名";             worksheet.Cell(1, "D").Value = "チャンネルUrl";             worksheet.Cell(1, "E").Value = "再生数";             worksheet.Cell(1, "F").Value = "アップロード時";             int i = 2;             foreach (Data data in datas)             {                 worksheet.Cell(i, "A").Value = data.VideoTitle;                 worksheet.Cell(i, "B").Value = data.VideoUrl;                 worksheet.Cell(i, "C").Value = data.ChannelName;                 worksheet.Cell(i, "D").Value = data.ChannelUrl;                 worksheet.Cell(i, "E").Value = data.Count;                 worksheet.Cell(i, "F").Value = data.UploadTime;                 i++;             }             workbook.SaveAs(excelPath);         }     } } | 
アプリケーションが終了するときは忘れずにChromeDriverをDisposeしておきます。
| 1 2 3 4 5 6 7 8 9 10 | public partial class Form1 : Form {     protected override void OnClosed(EventArgs e)     {         this.Visible = false;         driver.Quit();         driver.Dispose();         base.OnClosed(e);     } } | 
