add movie scraping and documentation
Some checks failed
Build docker container / build (push) Failing after 7m21s

This commit is contained in:
David Claeys
2024-05-17 10:17:15 +02:00
parent a82d7bd861
commit 87e8700236
8 changed files with 203 additions and 14 deletions

View File

@ -7,6 +7,8 @@ using System.Web;
using TableSpans.HtmlAgilityPack;
using TelebilbaoEpg.Database.Models;
using TelebilbaoEpg.Database.Repository;
using Telebilbap_Epg.Services;
using static System.Net.Mime.MediaTypeNames;
namespace TelebilbaoEpg.Jobs
{
@ -14,11 +16,13 @@ namespace TelebilbaoEpg.Jobs
{
private IConfiguration _configuration;
private IBroadCastRepository _broadCastRepository;
private IMovieService _movieService;
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository)
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository, IMovieService movieService)
{
_configuration = configuration;
_broadCastRepository = broadCastRepository;
_movieService = movieService;
}
private List<TimeBlock> GetTimeBlocks(HtmlNode programTable)
@ -510,8 +514,45 @@ namespace TelebilbaoEpg.Jobs
var startSaveDate = parsedBroadCasts.Min(x => x.From);
var endSaveDate = parsedBroadCasts.Max(x => x.To);
var movieIndicator = "Cine.";
var movies = parsedBroadCasts.Where(b => b.Name.Contains(movieIndicator))
.ToList();
foreach(var movie in movies)
{
string yearPattern = "(\\d{4})";
var textWithoutIndicator = movie.Name.Replace(movieIndicator, string.Empty).Trim();
var match = Regex.Match(textWithoutIndicator, yearPattern);
int? year = null;
if (match.Success)
{
year = int.Parse(match.Value);
}
var title = textWithoutIndicator;
if (year.HasValue)
{
var yearIndex = textWithoutIndicator.IndexOf(year.Value.ToString());
title = textWithoutIndicator.Substring(0, yearIndex).Replace(".", "").Trim();
}
var foundMovie = await _movieService.GetMovie(title, year);
if (foundMovie != null)
{
movie.Name = foundMovie.Title;
movie.Description = foundMovie.Description;
movie.ImageUrl = foundMovie.ImageUrl;
}
}
var savedBroadCasts = _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(startSaveDate), DateOnly.FromDateTime(endSaveDate));
foreach (var broadcast in parsedBroadCasts)
{
var shouldSave = !savedBroadCasts.Any(b => b.From == broadcast.From && b.To == broadcast.To && b.Name == broadcast.Name);

View File

@ -1,6 +1,7 @@
using Quartz;
using TelebilbaoEpg.Database.Repository;
using TelebilbaoEpg.Jobs;
using Telebilbap_Epg.Services;
var builder = WebApplication.CreateBuilder(args);
@ -11,6 +12,8 @@ builder.Services.AddControllers();
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
builder.Services.AddHttpClient();
builder.Services.AddQuartz();
builder.Services.AddQuartzHostedService(opt =>
{
@ -20,6 +23,7 @@ builder.Services.AddQuartzHostedService(opt =>
builder.Logging.ClearProviders();
builder.Logging.AddConsole();
builder.Services.AddScoped<IMovieService, MovieService>();
builder.Services.AddScoped<IBroadCastRepository, BroadCastRepository>();
var app = builder.Build();

View File

@ -0,0 +1,7 @@
namespace Telebilbap_Epg.Services
{
public interface IMovieService
{
Task<Movie?> GetMovie(string title, int? year);
}
}

View File

@ -0,0 +1,111 @@
using System.Text.Json.Serialization;
namespace Telebilbap_Epg.Services
{
public class MovieService : IMovieService
{
private HttpClient _httpClient;
private IConfiguration _configuration;
public MovieService(HttpClient httpClient, IConfiguration configuration)
{
_httpClient = httpClient;
_configuration = configuration;
}
public async Task<Movie?> GetMovie(string title, int? year)
{
Movie? ret = null;
var apiUrl = _configuration.GetValue<string>("MovieApi:Url");
var apiKey = _configuration.GetValue<string>("MovieApi:ApiKey");
var imageUrl = _configuration.GetValue<string>("MovieApi:ImageUrl");
var queryString = System.Web.HttpUtility.ParseQueryString(string.Empty);
queryString.Add("query", title);
queryString.Add("language", "es");
if (year.HasValue)
{
queryString.Add("year", year.ToString());
}
queryString.Add("api_key", apiKey);
var url = $"{apiUrl}/3/search/movie?{queryString}";
var requestResponse = await _httpClient.GetAsync(url);
var results = await requestResponse.Content.ReadFromJsonAsync<ApiResults>();
if(results != null && results.TotalResults > 0)
{
var firstResult = results.Results.Count > 1 ? results.Results.FirstOrDefault(r => r.Title.ToLower().Equals(title.ToLower())) : results.Results.FirstOrDefault();
if(firstResult != null)
{
DateOnly? releaseDate = null;
try
{
releaseDate = DateOnly
.Parse(firstResult.ReleaseDate);
}
catch (FormatException) { }
var posterPath = string.Empty;
if (!string.IsNullOrEmpty(firstResult.PosterPath))
{
posterPath = $"{imageUrl}/t/p/original{firstResult.PosterPath}";
}
ret = new Movie()
{
ImageUrl = posterPath,
Title = firstResult.Title,
ReleaseDate = releaseDate,
Description = firstResult.Overview,
};
}
}
return ret;
}
}
internal class ApiResults
{
[JsonPropertyName("total_results")]
public int TotalResults { get; set; }
public List<ApiResult> Results { get; set; } = new List<ApiResult>();
}
internal class ApiResult
{
public string Title { get; set; } = string.Empty;
public string Overview { get; set; } = string.Empty;
[JsonPropertyName("poster_path")]
public string PosterPath { get; set; } = string.Empty;
[JsonPropertyName("release_date")]
public string ReleaseDate { get; set; } = string.Empty;
}
public class Movie
{
public string Title { get; set; } = string.Empty;
public string Description { get; set; } = string.Empty;
public DateOnly? ReleaseDate { get; set; } = null;
public string ImageUrl { get; set; } = string.Empty;
}
}

View File

@ -5,9 +5,14 @@
"Microsoft.AspNetCore": "Warning"
},
"Quartz": {
"JobSchedule": "0 0/1 * * * ?"
"JobSchedule": "$JOB_SCHEDULE"
},
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
"StationProgramInformationUrl": "https://www.telebilbao.es/"
"StationProgramInformationUrl": "https://www.telebilbao.es/",
"MovieApi": {
"Url": "$MOVIE_API_URL",
"ApiKey": "$MOVIE_API_KEY",
"ImageUrl": "$MOVIE_IMAGE_URL"
}
}
}

View File

@ -7,8 +7,13 @@
},
"AllowedHosts": "*",
"Quartz": {
"JobSchedule": "0 0/30 * * * ?"
"JobSchedule": "$JOB_SCHEDULE"
},
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
"StationProgramInformationUrl": "https://www.telebilbao.es/"
"StationProgramInformationUrl": "https://www.telebilbao.es/",
"MovieApi": {
"Url": "$MOVIE_API_URL",
"ApiKey": "$MOVIE_API_KEY",
"ImageUrl": "$MOVIE_IMAGE_URL"
}
}