add movie scraping and documentation
Some checks failed
Build docker container / build (push) Failing after 7m21s
Some checks failed
Build docker container / build (push) Failing after 7m21s
This commit is contained in:
parent
a82d7bd861
commit
87e8700236
12
Dockerfile
12
Dockerfile
@ -17,6 +17,8 @@ RUN apk update \
|
||||
&& apk upgrade --available \
|
||||
&& apk add ca-certificates \
|
||||
&& apk add tzdata \
|
||||
&& apk add envsubst \
|
||||
&& apk add bash \
|
||||
&& mkdir /config && mkdir -p /usr/local/share/ca-certificates/
|
||||
COPY --from=build-env /App/out .
|
||||
COPY --from=build-env /config /config
|
||||
@ -26,6 +28,10 @@ ENV ASPNETCORE_URLS=http://+:80;https://+:443
|
||||
ENV ASPNETCORE_Kestrel__Certificates__Default__Path=/usr/local/share/ca-certificates/aspnetapp.crt
|
||||
ENV ASPNETCORE_Kestrel__Certificates__Default__KeyPath=/usr/local/share/ca-certificates/aspnetapp.key
|
||||
ENV ASPNETCORE_Kestrel__Certificates__Default__Password=$CERT_PASSWORD
|
||||
ENV JOB_SCHEDULE="0 0/30 * * * ?"
|
||||
ENV MOVIE_API_URL=https://api.themoviedb.org/
|
||||
ENV MOVIE_API_KEY=""
|
||||
ENV MOVIE_IMAGE_URL=https://image.tmdb.org
|
||||
RUN chown -R app:app /App/* \
|
||||
&& cp /config/aspnetapp.pem $ASPNETCORE_Kestrel__Certificates__Default__Path \
|
||||
&& cp /config/aspnetapp.key $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \
|
||||
@ -41,8 +47,8 @@ RUN chown -R app:app /App/* \
|
||||
&& mkdir /data && chmod 755 /data \
|
||||
&& cat > /data/telebilbaoEpg.db \
|
||||
&& chmod 777 /data/telebilbaoEpg.db \
|
||||
&& chown -R app:app /data/*
|
||||
|
||||
ENTRYPOINT ["dotnet", "TelebilbaoEpg.dll"]
|
||||
&& chown -R app:app /data/* \
|
||||
ENTRYPOINT echo "$(envsubst '${MOVIE_API_URL},${MOVIE_API_KEY},${MOVIE_IMAGE_URL},${$JOB_SCHEDULE}' < appsettings.json)" > appsettings.json \
|
||||
&& dotnet "TelebilbaoEpg.dll"
|
||||
EXPOSE 80
|
||||
EXPOSE 443
|
22
README.md
22
README.md
@ -22,10 +22,20 @@ Parameters :
|
||||
- from : start date to get the schedule
|
||||
- to : end date to get the schedule
|
||||
|
||||
## Pending
|
||||
## Movie API
|
||||
|
||||
For movies the page does not contain any metada or poster.
|
||||
In order to get this data [TMDB](https://developer.themoviedb.org/reference/intro/getting-started) is used.
|
||||
In order to get your API key follow the steps on [this](https://developer.themoviedb.org/docs/getting-started) page.
|
||||
|
||||
## Docker image
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description | Default |
|
||||
|-------------------------------|------------------------------------------------------------------------|-------------------------------|
|
||||
| JOB_SCHEDULE | Cron expression indicating the scraping recurrence | 0 0/30 * * * ? |
|
||||
| MOVIE_API_URL | The url to the movie API | https://api.themoviedb.org/ |
|
||||
| MOVIE_IMAGE_URL | The base url for images on the movie API | https://image.tmdb.org |
|
||||
| MOVIE_API_KEY | The API key for the API | N/A |
|
||||
|
||||
Sometimes movies are emitted on this channel.
|
||||
The titles are mentioned in Spanish together with the release year.
|
||||
First the IMDB API was considered to acomplish this but it might not work with Spanish retro titles.
|
||||
Furthermore the IMDB requires an API token which would make the scraper more difficult to use.
|
||||
An alternative that supports Spanish and without API tokens needs to be found.
|
@ -7,6 +7,8 @@ using System.Web;
|
||||
using TableSpans.HtmlAgilityPack;
|
||||
using TelebilbaoEpg.Database.Models;
|
||||
using TelebilbaoEpg.Database.Repository;
|
||||
using Telebilbap_Epg.Services;
|
||||
using static System.Net.Mime.MediaTypeNames;
|
||||
|
||||
namespace TelebilbaoEpg.Jobs
|
||||
{
|
||||
@ -14,11 +16,13 @@ namespace TelebilbaoEpg.Jobs
|
||||
{
|
||||
private IConfiguration _configuration;
|
||||
private IBroadCastRepository _broadCastRepository;
|
||||
private IMovieService _movieService;
|
||||
|
||||
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository)
|
||||
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository, IMovieService movieService)
|
||||
{
|
||||
_configuration = configuration;
|
||||
_broadCastRepository = broadCastRepository;
|
||||
_movieService = movieService;
|
||||
}
|
||||
|
||||
private List<TimeBlock> GetTimeBlocks(HtmlNode programTable)
|
||||
@ -510,8 +514,45 @@ namespace TelebilbaoEpg.Jobs
|
||||
var startSaveDate = parsedBroadCasts.Min(x => x.From);
|
||||
var endSaveDate = parsedBroadCasts.Max(x => x.To);
|
||||
|
||||
var movieIndicator = "Cine.";
|
||||
var movies = parsedBroadCasts.Where(b => b.Name.Contains(movieIndicator))
|
||||
.ToList();
|
||||
|
||||
foreach(var movie in movies)
|
||||
{
|
||||
string yearPattern = "(\\d{4})";
|
||||
|
||||
var textWithoutIndicator = movie.Name.Replace(movieIndicator, string.Empty).Trim();
|
||||
|
||||
var match = Regex.Match(textWithoutIndicator, yearPattern);
|
||||
int? year = null;
|
||||
|
||||
if (match.Success)
|
||||
{
|
||||
year = int.Parse(match.Value);
|
||||
}
|
||||
|
||||
var title = textWithoutIndicator;
|
||||
|
||||
if (year.HasValue)
|
||||
{
|
||||
var yearIndex = textWithoutIndicator.IndexOf(year.Value.ToString());
|
||||
title = textWithoutIndicator.Substring(0, yearIndex).Replace(".", "").Trim();
|
||||
}
|
||||
|
||||
var foundMovie = await _movieService.GetMovie(title, year);
|
||||
|
||||
if (foundMovie != null)
|
||||
{
|
||||
movie.Name = foundMovie.Title;
|
||||
movie.Description = foundMovie.Description;
|
||||
movie.ImageUrl = foundMovie.ImageUrl;
|
||||
}
|
||||
}
|
||||
|
||||
var savedBroadCasts = _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(startSaveDate), DateOnly.FromDateTime(endSaveDate));
|
||||
|
||||
|
||||
foreach (var broadcast in parsedBroadCasts)
|
||||
{
|
||||
var shouldSave = !savedBroadCasts.Any(b => b.From == broadcast.From && b.To == broadcast.To && b.Name == broadcast.Name);
|
||||
|
@ -1,6 +1,7 @@
|
||||
using Quartz;
|
||||
using TelebilbaoEpg.Database.Repository;
|
||||
using TelebilbaoEpg.Jobs;
|
||||
using Telebilbap_Epg.Services;
|
||||
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
@ -11,6 +12,8 @@ builder.Services.AddControllers();
|
||||
builder.Services.AddEndpointsApiExplorer();
|
||||
builder.Services.AddSwaggerGen();
|
||||
|
||||
builder.Services.AddHttpClient();
|
||||
|
||||
builder.Services.AddQuartz();
|
||||
builder.Services.AddQuartzHostedService(opt =>
|
||||
{
|
||||
@ -20,6 +23,7 @@ builder.Services.AddQuartzHostedService(opt =>
|
||||
builder.Logging.ClearProviders();
|
||||
builder.Logging.AddConsole();
|
||||
|
||||
builder.Services.AddScoped<IMovieService, MovieService>();
|
||||
builder.Services.AddScoped<IBroadCastRepository, BroadCastRepository>();
|
||||
|
||||
var app = builder.Build();
|
||||
|
7
TelebilbaoEpg/Services/IMovieService.cs
Normal file
7
TelebilbaoEpg/Services/IMovieService.cs
Normal file
@ -0,0 +1,7 @@
|
||||
namespace Telebilbap_Epg.Services
|
||||
{
|
||||
public interface IMovieService
|
||||
{
|
||||
Task<Movie?> GetMovie(string title, int? year);
|
||||
}
|
||||
}
|
111
TelebilbaoEpg/Services/MovieService.cs
Normal file
111
TelebilbaoEpg/Services/MovieService.cs
Normal file
@ -0,0 +1,111 @@
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Telebilbap_Epg.Services
|
||||
{
|
||||
public class MovieService : IMovieService
|
||||
{
|
||||
private HttpClient _httpClient;
|
||||
|
||||
private IConfiguration _configuration;
|
||||
|
||||
public MovieService(HttpClient httpClient, IConfiguration configuration)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_configuration = configuration;
|
||||
}
|
||||
|
||||
public async Task<Movie?> GetMovie(string title, int? year)
|
||||
{
|
||||
Movie? ret = null;
|
||||
|
||||
var apiUrl = _configuration.GetValue<string>("MovieApi:Url");
|
||||
var apiKey = _configuration.GetValue<string>("MovieApi:ApiKey");
|
||||
var imageUrl = _configuration.GetValue<string>("MovieApi:ImageUrl");
|
||||
|
||||
var queryString = System.Web.HttpUtility.ParseQueryString(string.Empty);
|
||||
|
||||
queryString.Add("query", title);
|
||||
queryString.Add("language", "es");
|
||||
|
||||
if (year.HasValue)
|
||||
{
|
||||
queryString.Add("year", year.ToString());
|
||||
}
|
||||
|
||||
queryString.Add("api_key", apiKey);
|
||||
|
||||
var url = $"{apiUrl}/3/search/movie?{queryString}";
|
||||
|
||||
var requestResponse = await _httpClient.GetAsync(url);
|
||||
|
||||
var results = await requestResponse.Content.ReadFromJsonAsync<ApiResults>();
|
||||
|
||||
if(results != null && results.TotalResults > 0)
|
||||
{
|
||||
var firstResult = results.Results.Count > 1 ? results.Results.FirstOrDefault(r => r.Title.ToLower().Equals(title.ToLower())) : results.Results.FirstOrDefault();
|
||||
|
||||
if(firstResult != null)
|
||||
{
|
||||
DateOnly? releaseDate = null;
|
||||
|
||||
try
|
||||
{
|
||||
releaseDate = DateOnly
|
||||
.Parse(firstResult.ReleaseDate);
|
||||
}
|
||||
catch (FormatException) { }
|
||||
|
||||
var posterPath = string.Empty;
|
||||
|
||||
if (!string.IsNullOrEmpty(firstResult.PosterPath))
|
||||
{
|
||||
posterPath = $"{imageUrl}/t/p/original{firstResult.PosterPath}";
|
||||
}
|
||||
|
||||
ret = new Movie()
|
||||
{
|
||||
ImageUrl = posterPath,
|
||||
Title = firstResult.Title,
|
||||
ReleaseDate = releaseDate,
|
||||
Description = firstResult.Overview,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
internal class ApiResults
|
||||
{
|
||||
[JsonPropertyName("total_results")]
|
||||
public int TotalResults { get; set; }
|
||||
|
||||
public List<ApiResult> Results { get; set; } = new List<ApiResult>();
|
||||
}
|
||||
|
||||
internal class ApiResult
|
||||
{
|
||||
public string Title { get; set; } = string.Empty;
|
||||
|
||||
public string Overview { get; set; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("poster_path")]
|
||||
public string PosterPath { get; set; } = string.Empty;
|
||||
|
||||
[JsonPropertyName("release_date")]
|
||||
public string ReleaseDate { get; set; } = string.Empty;
|
||||
}
|
||||
|
||||
public class Movie
|
||||
{
|
||||
public string Title { get; set; } = string.Empty;
|
||||
|
||||
public string Description { get; set; } = string.Empty;
|
||||
|
||||
public DateOnly? ReleaseDate { get; set; } = null;
|
||||
|
||||
public string ImageUrl { get; set; } = string.Empty;
|
||||
}
|
||||
}
|
@ -5,9 +5,14 @@
|
||||
"Microsoft.AspNetCore": "Warning"
|
||||
},
|
||||
"Quartz": {
|
||||
"JobSchedule": "0 0/1 * * * ?"
|
||||
"JobSchedule": "$JOB_SCHEDULE"
|
||||
},
|
||||
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
|
||||
"StationProgramInformationUrl": "https://www.telebilbao.es/"
|
||||
"StationProgramInformationUrl": "https://www.telebilbao.es/",
|
||||
"MovieApi": {
|
||||
"Url": "$MOVIE_API_URL",
|
||||
"ApiKey": "$MOVIE_API_KEY",
|
||||
"ImageUrl": "$MOVIE_IMAGE_URL"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,8 +7,13 @@
|
||||
},
|
||||
"AllowedHosts": "*",
|
||||
"Quartz": {
|
||||
"JobSchedule": "0 0/30 * * * ?"
|
||||
"JobSchedule": "$JOB_SCHEDULE"
|
||||
},
|
||||
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
|
||||
"StationProgramInformationUrl": "https://www.telebilbao.es/"
|
||||
"StationProgramInformationUrl": "https://www.telebilbao.es/",
|
||||
"MovieApi": {
|
||||
"Url": "$MOVIE_API_URL",
|
||||
"ApiKey": "$MOVIE_API_KEY",
|
||||
"ImageUrl": "$MOVIE_IMAGE_URL"
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user