diff --git a/Dockerfile b/Dockerfile index 155f8f3..5fafe57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,8 @@ RUN apk update \ && apk upgrade --available \ && apk add ca-certificates \ && apk add tzdata \ + && apk add envsubst \ + && apk add bash \ && mkdir /config && mkdir -p /usr/local/share/ca-certificates/ COPY --from=build-env /App/out . COPY --from=build-env /config /config @@ -26,6 +28,10 @@ ENV ASPNETCORE_URLS=http://+:80;https://+:443 ENV ASPNETCORE_Kestrel__Certificates__Default__Path=/usr/local/share/ca-certificates/aspnetapp.crt ENV ASPNETCORE_Kestrel__Certificates__Default__KeyPath=/usr/local/share/ca-certificates/aspnetapp.key ENV ASPNETCORE_Kestrel__Certificates__Default__Password=$CERT_PASSWORD +ENV JOB_SCHEDULE="0 0/30 * * * ?" +ENV MOVIE_API_URL=https://api.themoviedb.org/ +ENV MOVIE_API_KEY="" +ENV MOVIE_IMAGE_URL=https://image.tmdb.org RUN chown -R app:app /App/* \ && cp /config/aspnetapp.pem $ASPNETCORE_Kestrel__Certificates__Default__Path \ && cp /config/aspnetapp.key $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \ @@ -41,8 +47,8 @@ RUN chown -R app:app /App/* \ && mkdir /data && chmod 755 /data \ && cat > /data/telebilbaoEpg.db \ && chmod 777 /data/telebilbaoEpg.db \ - && chown -R app:app /data/* - -ENTRYPOINT ["dotnet", "TelebilbaoEpg.dll"] + && chown -R app:app /data/* \ +ENTRYPOINT echo "$(envsubst '${MOVIE_API_URL},${MOVIE_API_KEY},${MOVIE_IMAGE_URL},${$JOB_SCHEDULE}' < appsettings.json)" > appsettings.json \ + && dotnet "TelebilbaoEpg.dll" EXPOSE 80 EXPOSE 443 \ No newline at end of file diff --git a/README.md b/README.md index 41ec7de..c638682 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,20 @@ Parameters : - from : start date to get the schedule - to : end date to get the schedule -## Pending +## Movie API + +For movies the page does not contain any metada or poster. +In order to get this data [TMDB](https://developer.themoviedb.org/reference/intro/getting-started) is used. +In order to get your API key follow the steps on [this](https://developer.themoviedb.org/docs/getting-started) page. + +## Docker image + +### Environment Variables + +| Variable | Description | Default | +|-------------------------------|------------------------------------------------------------------------|-------------------------------| +| JOB_SCHEDULE | Cron expression indicating the scraping recurrence | 0 0/30 * * * ? | +| MOVIE_API_URL | The url to the movie API | https://api.themoviedb.org/ | +| MOVIE_IMAGE_URL | The base url for images on the movie API | https://image.tmdb.org | +| MOVIE_API_KEY | The API key for the API | N/A | -Sometimes movies are emitted on this channel. -The titles are mentioned in Spanish together with the release year. -First the IMDB API was considered to acomplish this but it might not work with Spanish retro titles. -Furthermore the IMDB requires an API token which would make the scraper more difficult to use. -An alternative that supports Spanish and without API tokens needs to be found. \ No newline at end of file diff --git a/TelebilbaoEpg/Jobs/ScrapeJob.cs b/TelebilbaoEpg/Jobs/ScrapeJob.cs index 6844c59..3a8f4a8 100644 --- a/TelebilbaoEpg/Jobs/ScrapeJob.cs +++ b/TelebilbaoEpg/Jobs/ScrapeJob.cs @@ -7,6 +7,8 @@ using System.Web; using TableSpans.HtmlAgilityPack; using TelebilbaoEpg.Database.Models; using TelebilbaoEpg.Database.Repository; +using Telebilbap_Epg.Services; +using static System.Net.Mime.MediaTypeNames; namespace TelebilbaoEpg.Jobs { @@ -14,11 +16,13 @@ namespace TelebilbaoEpg.Jobs { private IConfiguration _configuration; private IBroadCastRepository _broadCastRepository; + private IMovieService _movieService; - public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository) + public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository, IMovieService movieService) { _configuration = configuration; _broadCastRepository = broadCastRepository; + _movieService = movieService; } private List GetTimeBlocks(HtmlNode programTable) @@ -510,8 +514,45 @@ namespace TelebilbaoEpg.Jobs var startSaveDate = parsedBroadCasts.Min(x => x.From); var endSaveDate = parsedBroadCasts.Max(x => x.To); + var movieIndicator = "Cine."; + var movies = parsedBroadCasts.Where(b => b.Name.Contains(movieIndicator)) + .ToList(); + + foreach(var movie in movies) + { + string yearPattern = "(\\d{4})"; + + var textWithoutIndicator = movie.Name.Replace(movieIndicator, string.Empty).Trim(); + + var match = Regex.Match(textWithoutIndicator, yearPattern); + int? year = null; + + if (match.Success) + { + year = int.Parse(match.Value); + } + + var title = textWithoutIndicator; + + if (year.HasValue) + { + var yearIndex = textWithoutIndicator.IndexOf(year.Value.ToString()); + title = textWithoutIndicator.Substring(0, yearIndex).Replace(".", "").Trim(); + } + + var foundMovie = await _movieService.GetMovie(title, year); + + if (foundMovie != null) + { + movie.Name = foundMovie.Title; + movie.Description = foundMovie.Description; + movie.ImageUrl = foundMovie.ImageUrl; + } + } + var savedBroadCasts = _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(startSaveDate), DateOnly.FromDateTime(endSaveDate)); + foreach (var broadcast in parsedBroadCasts) { var shouldSave = !savedBroadCasts.Any(b => b.From == broadcast.From && b.To == broadcast.To && b.Name == broadcast.Name); diff --git a/TelebilbaoEpg/Program.cs b/TelebilbaoEpg/Program.cs index bd6b9f8..7de7d78 100644 --- a/TelebilbaoEpg/Program.cs +++ b/TelebilbaoEpg/Program.cs @@ -1,6 +1,7 @@ using Quartz; using TelebilbaoEpg.Database.Repository; using TelebilbaoEpg.Jobs; +using Telebilbap_Epg.Services; var builder = WebApplication.CreateBuilder(args); @@ -11,6 +12,8 @@ builder.Services.AddControllers(); builder.Services.AddEndpointsApiExplorer(); builder.Services.AddSwaggerGen(); +builder.Services.AddHttpClient(); + builder.Services.AddQuartz(); builder.Services.AddQuartzHostedService(opt => { @@ -20,6 +23,7 @@ builder.Services.AddQuartzHostedService(opt => builder.Logging.ClearProviders(); builder.Logging.AddConsole(); +builder.Services.AddScoped(); builder.Services.AddScoped(); var app = builder.Build(); diff --git a/TelebilbaoEpg/Services/IMovieService.cs b/TelebilbaoEpg/Services/IMovieService.cs new file mode 100644 index 0000000..a8bf632 --- /dev/null +++ b/TelebilbaoEpg/Services/IMovieService.cs @@ -0,0 +1,7 @@ +namespace Telebilbap_Epg.Services +{ + public interface IMovieService + { + Task GetMovie(string title, int? year); + } +} diff --git a/TelebilbaoEpg/Services/MovieService.cs b/TelebilbaoEpg/Services/MovieService.cs new file mode 100644 index 0000000..40e6a3d --- /dev/null +++ b/TelebilbaoEpg/Services/MovieService.cs @@ -0,0 +1,111 @@ +using System.Text.Json.Serialization; + +namespace Telebilbap_Epg.Services +{ + public class MovieService : IMovieService + { + private HttpClient _httpClient; + + private IConfiguration _configuration; + + public MovieService(HttpClient httpClient, IConfiguration configuration) + { + _httpClient = httpClient; + _configuration = configuration; + } + + public async Task GetMovie(string title, int? year) + { + Movie? ret = null; + + var apiUrl = _configuration.GetValue("MovieApi:Url"); + var apiKey = _configuration.GetValue("MovieApi:ApiKey"); + var imageUrl = _configuration.GetValue("MovieApi:ImageUrl"); + + var queryString = System.Web.HttpUtility.ParseQueryString(string.Empty); + + queryString.Add("query", title); + queryString.Add("language", "es"); + + if (year.HasValue) + { + queryString.Add("year", year.ToString()); + } + + queryString.Add("api_key", apiKey); + + var url = $"{apiUrl}/3/search/movie?{queryString}"; + + var requestResponse = await _httpClient.GetAsync(url); + + var results = await requestResponse.Content.ReadFromJsonAsync(); + + if(results != null && results.TotalResults > 0) + { + var firstResult = results.Results.Count > 1 ? results.Results.FirstOrDefault(r => r.Title.ToLower().Equals(title.ToLower())) : results.Results.FirstOrDefault(); + + if(firstResult != null) + { + DateOnly? releaseDate = null; + + try + { + releaseDate = DateOnly + .Parse(firstResult.ReleaseDate); + } + catch (FormatException) { } + + var posterPath = string.Empty; + + if (!string.IsNullOrEmpty(firstResult.PosterPath)) + { + posterPath = $"{imageUrl}/t/p/original{firstResult.PosterPath}"; + } + + ret = new Movie() + { + ImageUrl = posterPath, + Title = firstResult.Title, + ReleaseDate = releaseDate, + Description = firstResult.Overview, + }; + } + } + + return ret; + } + + } + + internal class ApiResults + { + [JsonPropertyName("total_results")] + public int TotalResults { get; set; } + + public List Results { get; set; } = new List(); + } + + internal class ApiResult + { + public string Title { get; set; } = string.Empty; + + public string Overview { get; set; } = string.Empty; + + [JsonPropertyName("poster_path")] + public string PosterPath { get; set; } = string.Empty; + + [JsonPropertyName("release_date")] + public string ReleaseDate { get; set; } = string.Empty; + } + + public class Movie + { + public string Title { get; set; } = string.Empty; + + public string Description { get; set; } = string.Empty; + + public DateOnly? ReleaseDate { get; set; } = null; + + public string ImageUrl { get; set; } = string.Empty; + } +} diff --git a/TelebilbaoEpg/appsettings.Development.json b/TelebilbaoEpg/appsettings.Development.json index fa63cfd..35479ff 100644 --- a/TelebilbaoEpg/appsettings.Development.json +++ b/TelebilbaoEpg/appsettings.Development.json @@ -5,9 +5,14 @@ "Microsoft.AspNetCore": "Warning" }, "Quartz": { - "JobSchedule": "0 0/1 * * * ?" + "JobSchedule": "$JOB_SCHEDULE" }, "TableScrapeUrl": "https://www.telebilbao.es/programacion/", - "StationProgramInformationUrl": "https://www.telebilbao.es/" + "StationProgramInformationUrl": "https://www.telebilbao.es/", + "MovieApi": { + "Url": "$MOVIE_API_URL", + "ApiKey": "$MOVIE_API_KEY", + "ImageUrl": "$MOVIE_IMAGE_URL" + } } } diff --git a/TelebilbaoEpg/appsettings.json b/TelebilbaoEpg/appsettings.json index 0eb250d..1ebccbd 100644 --- a/TelebilbaoEpg/appsettings.json +++ b/TelebilbaoEpg/appsettings.json @@ -7,8 +7,13 @@ }, "AllowedHosts": "*", "Quartz": { - "JobSchedule": "0 0/30 * * * ?" + "JobSchedule": "$JOB_SCHEDULE" }, "TableScrapeUrl": "https://www.telebilbao.es/programacion/", - "StationProgramInformationUrl": "https://www.telebilbao.es/" + "StationProgramInformationUrl": "https://www.telebilbao.es/", + "MovieApi": { + "Url": "$MOVIE_API_URL", + "ApiKey": "$MOVIE_API_KEY", + "ImageUrl": "$MOVIE_IMAGE_URL" + } }