add movie scraping and documentation
Some checks failed
Build docker container / build (push) Failing after 7m21s
Some checks failed
Build docker container / build (push) Failing after 7m21s
This commit is contained in:
parent
a82d7bd861
commit
87e8700236
12
Dockerfile
12
Dockerfile
@ -17,6 +17,8 @@ RUN apk update \
|
|||||||
&& apk upgrade --available \
|
&& apk upgrade --available \
|
||||||
&& apk add ca-certificates \
|
&& apk add ca-certificates \
|
||||||
&& apk add tzdata \
|
&& apk add tzdata \
|
||||||
|
&& apk add envsubst \
|
||||||
|
&& apk add bash \
|
||||||
&& mkdir /config && mkdir -p /usr/local/share/ca-certificates/
|
&& mkdir /config && mkdir -p /usr/local/share/ca-certificates/
|
||||||
COPY --from=build-env /App/out .
|
COPY --from=build-env /App/out .
|
||||||
COPY --from=build-env /config /config
|
COPY --from=build-env /config /config
|
||||||
@ -26,6 +28,10 @@ ENV ASPNETCORE_URLS=http://+:80;https://+:443
|
|||||||
ENV ASPNETCORE_Kestrel__Certificates__Default__Path=/usr/local/share/ca-certificates/aspnetapp.crt
|
ENV ASPNETCORE_Kestrel__Certificates__Default__Path=/usr/local/share/ca-certificates/aspnetapp.crt
|
||||||
ENV ASPNETCORE_Kestrel__Certificates__Default__KeyPath=/usr/local/share/ca-certificates/aspnetapp.key
|
ENV ASPNETCORE_Kestrel__Certificates__Default__KeyPath=/usr/local/share/ca-certificates/aspnetapp.key
|
||||||
ENV ASPNETCORE_Kestrel__Certificates__Default__Password=$CERT_PASSWORD
|
ENV ASPNETCORE_Kestrel__Certificates__Default__Password=$CERT_PASSWORD
|
||||||
|
ENV JOB_SCHEDULE="0 0/30 * * * ?"
|
||||||
|
ENV MOVIE_API_URL=https://api.themoviedb.org/
|
||||||
|
ENV MOVIE_API_KEY=""
|
||||||
|
ENV MOVIE_IMAGE_URL=https://image.tmdb.org
|
||||||
RUN chown -R app:app /App/* \
|
RUN chown -R app:app /App/* \
|
||||||
&& cp /config/aspnetapp.pem $ASPNETCORE_Kestrel__Certificates__Default__Path \
|
&& cp /config/aspnetapp.pem $ASPNETCORE_Kestrel__Certificates__Default__Path \
|
||||||
&& cp /config/aspnetapp.key $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \
|
&& cp /config/aspnetapp.key $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \
|
||||||
@ -41,8 +47,8 @@ RUN chown -R app:app /App/* \
|
|||||||
&& mkdir /data && chmod 755 /data \
|
&& mkdir /data && chmod 755 /data \
|
||||||
&& cat > /data/telebilbaoEpg.db \
|
&& cat > /data/telebilbaoEpg.db \
|
||||||
&& chmod 777 /data/telebilbaoEpg.db \
|
&& chmod 777 /data/telebilbaoEpg.db \
|
||||||
&& chown -R app:app /data/*
|
&& chown -R app:app /data/* \
|
||||||
|
ENTRYPOINT echo "$(envsubst '${MOVIE_API_URL},${MOVIE_API_KEY},${MOVIE_IMAGE_URL},${$JOB_SCHEDULE}' < appsettings.json)" > appsettings.json \
|
||||||
ENTRYPOINT ["dotnet", "TelebilbaoEpg.dll"]
|
&& dotnet "TelebilbaoEpg.dll"
|
||||||
EXPOSE 80
|
EXPOSE 80
|
||||||
EXPOSE 443
|
EXPOSE 443
|
22
README.md
22
README.md
@ -22,10 +22,20 @@ Parameters :
|
|||||||
- from : start date to get the schedule
|
- from : start date to get the schedule
|
||||||
- to : end date to get the schedule
|
- to : end date to get the schedule
|
||||||
|
|
||||||
## Pending
|
## Movie API
|
||||||
|
|
||||||
|
For movies the page does not contain any metada or poster.
|
||||||
|
In order to get this data [TMDB](https://developer.themoviedb.org/reference/intro/getting-started) is used.
|
||||||
|
In order to get your API key follow the steps on [this](https://developer.themoviedb.org/docs/getting-started) page.
|
||||||
|
|
||||||
|
## Docker image
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|-------------------------------|------------------------------------------------------------------------|-------------------------------|
|
||||||
|
| JOB_SCHEDULE | Cron expression indicating the scraping recurrence | 0 0/30 * * * ? |
|
||||||
|
| MOVIE_API_URL | The url to the movie API | https://api.themoviedb.org/ |
|
||||||
|
| MOVIE_IMAGE_URL | The base url for images on the movie API | https://image.tmdb.org |
|
||||||
|
| MOVIE_API_KEY | The API key for the API | N/A |
|
||||||
|
|
||||||
Sometimes movies are emitted on this channel.
|
|
||||||
The titles are mentioned in Spanish together with the release year.
|
|
||||||
First the IMDB API was considered to acomplish this but it might not work with Spanish retro titles.
|
|
||||||
Furthermore the IMDB requires an API token which would make the scraper more difficult to use.
|
|
||||||
An alternative that supports Spanish and without API tokens needs to be found.
|
|
@ -7,6 +7,8 @@ using System.Web;
|
|||||||
using TableSpans.HtmlAgilityPack;
|
using TableSpans.HtmlAgilityPack;
|
||||||
using TelebilbaoEpg.Database.Models;
|
using TelebilbaoEpg.Database.Models;
|
||||||
using TelebilbaoEpg.Database.Repository;
|
using TelebilbaoEpg.Database.Repository;
|
||||||
|
using Telebilbap_Epg.Services;
|
||||||
|
using static System.Net.Mime.MediaTypeNames;
|
||||||
|
|
||||||
namespace TelebilbaoEpg.Jobs
|
namespace TelebilbaoEpg.Jobs
|
||||||
{
|
{
|
||||||
@ -14,11 +16,13 @@ namespace TelebilbaoEpg.Jobs
|
|||||||
{
|
{
|
||||||
private IConfiguration _configuration;
|
private IConfiguration _configuration;
|
||||||
private IBroadCastRepository _broadCastRepository;
|
private IBroadCastRepository _broadCastRepository;
|
||||||
|
private IMovieService _movieService;
|
||||||
|
|
||||||
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository)
|
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository, IMovieService movieService)
|
||||||
{
|
{
|
||||||
_configuration = configuration;
|
_configuration = configuration;
|
||||||
_broadCastRepository = broadCastRepository;
|
_broadCastRepository = broadCastRepository;
|
||||||
|
_movieService = movieService;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<TimeBlock> GetTimeBlocks(HtmlNode programTable)
|
private List<TimeBlock> GetTimeBlocks(HtmlNode programTable)
|
||||||
@ -510,8 +514,45 @@ namespace TelebilbaoEpg.Jobs
|
|||||||
var startSaveDate = parsedBroadCasts.Min(x => x.From);
|
var startSaveDate = parsedBroadCasts.Min(x => x.From);
|
||||||
var endSaveDate = parsedBroadCasts.Max(x => x.To);
|
var endSaveDate = parsedBroadCasts.Max(x => x.To);
|
||||||
|
|
||||||
|
var movieIndicator = "Cine.";
|
||||||
|
var movies = parsedBroadCasts.Where(b => b.Name.Contains(movieIndicator))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
foreach(var movie in movies)
|
||||||
|
{
|
||||||
|
string yearPattern = "(\\d{4})";
|
||||||
|
|
||||||
|
var textWithoutIndicator = movie.Name.Replace(movieIndicator, string.Empty).Trim();
|
||||||
|
|
||||||
|
var match = Regex.Match(textWithoutIndicator, yearPattern);
|
||||||
|
int? year = null;
|
||||||
|
|
||||||
|
if (match.Success)
|
||||||
|
{
|
||||||
|
year = int.Parse(match.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
var title = textWithoutIndicator;
|
||||||
|
|
||||||
|
if (year.HasValue)
|
||||||
|
{
|
||||||
|
var yearIndex = textWithoutIndicator.IndexOf(year.Value.ToString());
|
||||||
|
title = textWithoutIndicator.Substring(0, yearIndex).Replace(".", "").Trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
var foundMovie = await _movieService.GetMovie(title, year);
|
||||||
|
|
||||||
|
if (foundMovie != null)
|
||||||
|
{
|
||||||
|
movie.Name = foundMovie.Title;
|
||||||
|
movie.Description = foundMovie.Description;
|
||||||
|
movie.ImageUrl = foundMovie.ImageUrl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var savedBroadCasts = _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(startSaveDate), DateOnly.FromDateTime(endSaveDate));
|
var savedBroadCasts = _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(startSaveDate), DateOnly.FromDateTime(endSaveDate));
|
||||||
|
|
||||||
|
|
||||||
foreach (var broadcast in parsedBroadCasts)
|
foreach (var broadcast in parsedBroadCasts)
|
||||||
{
|
{
|
||||||
var shouldSave = !savedBroadCasts.Any(b => b.From == broadcast.From && b.To == broadcast.To && b.Name == broadcast.Name);
|
var shouldSave = !savedBroadCasts.Any(b => b.From == broadcast.From && b.To == broadcast.To && b.Name == broadcast.Name);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
using Quartz;
|
using Quartz;
|
||||||
using TelebilbaoEpg.Database.Repository;
|
using TelebilbaoEpg.Database.Repository;
|
||||||
using TelebilbaoEpg.Jobs;
|
using TelebilbaoEpg.Jobs;
|
||||||
|
using Telebilbap_Epg.Services;
|
||||||
|
|
||||||
var builder = WebApplication.CreateBuilder(args);
|
var builder = WebApplication.CreateBuilder(args);
|
||||||
|
|
||||||
@ -11,6 +12,8 @@ builder.Services.AddControllers();
|
|||||||
builder.Services.AddEndpointsApiExplorer();
|
builder.Services.AddEndpointsApiExplorer();
|
||||||
builder.Services.AddSwaggerGen();
|
builder.Services.AddSwaggerGen();
|
||||||
|
|
||||||
|
builder.Services.AddHttpClient();
|
||||||
|
|
||||||
builder.Services.AddQuartz();
|
builder.Services.AddQuartz();
|
||||||
builder.Services.AddQuartzHostedService(opt =>
|
builder.Services.AddQuartzHostedService(opt =>
|
||||||
{
|
{
|
||||||
@ -20,6 +23,7 @@ builder.Services.AddQuartzHostedService(opt =>
|
|||||||
builder.Logging.ClearProviders();
|
builder.Logging.ClearProviders();
|
||||||
builder.Logging.AddConsole();
|
builder.Logging.AddConsole();
|
||||||
|
|
||||||
|
builder.Services.AddScoped<IMovieService, MovieService>();
|
||||||
builder.Services.AddScoped<IBroadCastRepository, BroadCastRepository>();
|
builder.Services.AddScoped<IBroadCastRepository, BroadCastRepository>();
|
||||||
|
|
||||||
var app = builder.Build();
|
var app = builder.Build();
|
||||||
|
7
TelebilbaoEpg/Services/IMovieService.cs
Normal file
7
TelebilbaoEpg/Services/IMovieService.cs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
namespace Telebilbap_Epg.Services
|
||||||
|
{
|
||||||
|
public interface IMovieService
|
||||||
|
{
|
||||||
|
Task<Movie?> GetMovie(string title, int? year);
|
||||||
|
}
|
||||||
|
}
|
111
TelebilbaoEpg/Services/MovieService.cs
Normal file
111
TelebilbaoEpg/Services/MovieService.cs
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
using System.Text.Json.Serialization;
|
||||||
|
|
||||||
|
namespace Telebilbap_Epg.Services
|
||||||
|
{
|
||||||
|
public class MovieService : IMovieService
|
||||||
|
{
|
||||||
|
private HttpClient _httpClient;
|
||||||
|
|
||||||
|
private IConfiguration _configuration;
|
||||||
|
|
||||||
|
public MovieService(HttpClient httpClient, IConfiguration configuration)
|
||||||
|
{
|
||||||
|
_httpClient = httpClient;
|
||||||
|
_configuration = configuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public async Task<Movie?> GetMovie(string title, int? year)
|
||||||
|
{
|
||||||
|
Movie? ret = null;
|
||||||
|
|
||||||
|
var apiUrl = _configuration.GetValue<string>("MovieApi:Url");
|
||||||
|
var apiKey = _configuration.GetValue<string>("MovieApi:ApiKey");
|
||||||
|
var imageUrl = _configuration.GetValue<string>("MovieApi:ImageUrl");
|
||||||
|
|
||||||
|
var queryString = System.Web.HttpUtility.ParseQueryString(string.Empty);
|
||||||
|
|
||||||
|
queryString.Add("query", title);
|
||||||
|
queryString.Add("language", "es");
|
||||||
|
|
||||||
|
if (year.HasValue)
|
||||||
|
{
|
||||||
|
queryString.Add("year", year.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
queryString.Add("api_key", apiKey);
|
||||||
|
|
||||||
|
var url = $"{apiUrl}/3/search/movie?{queryString}";
|
||||||
|
|
||||||
|
var requestResponse = await _httpClient.GetAsync(url);
|
||||||
|
|
||||||
|
var results = await requestResponse.Content.ReadFromJsonAsync<ApiResults>();
|
||||||
|
|
||||||
|
if(results != null && results.TotalResults > 0)
|
||||||
|
{
|
||||||
|
var firstResult = results.Results.Count > 1 ? results.Results.FirstOrDefault(r => r.Title.ToLower().Equals(title.ToLower())) : results.Results.FirstOrDefault();
|
||||||
|
|
||||||
|
if(firstResult != null)
|
||||||
|
{
|
||||||
|
DateOnly? releaseDate = null;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
releaseDate = DateOnly
|
||||||
|
.Parse(firstResult.ReleaseDate);
|
||||||
|
}
|
||||||
|
catch (FormatException) { }
|
||||||
|
|
||||||
|
var posterPath = string.Empty;
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(firstResult.PosterPath))
|
||||||
|
{
|
||||||
|
posterPath = $"{imageUrl}/t/p/original{firstResult.PosterPath}";
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = new Movie()
|
||||||
|
{
|
||||||
|
ImageUrl = posterPath,
|
||||||
|
Title = firstResult.Title,
|
||||||
|
ReleaseDate = releaseDate,
|
||||||
|
Description = firstResult.Overview,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
internal class ApiResults
|
||||||
|
{
|
||||||
|
[JsonPropertyName("total_results")]
|
||||||
|
public int TotalResults { get; set; }
|
||||||
|
|
||||||
|
public List<ApiResult> Results { get; set; } = new List<ApiResult>();
|
||||||
|
}
|
||||||
|
|
||||||
|
internal class ApiResult
|
||||||
|
{
|
||||||
|
public string Title { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
public string Overview { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
[JsonPropertyName("poster_path")]
|
||||||
|
public string PosterPath { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
[JsonPropertyName("release_date")]
|
||||||
|
public string ReleaseDate { get; set; } = string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Movie
|
||||||
|
{
|
||||||
|
public string Title { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
public string Description { get; set; } = string.Empty;
|
||||||
|
|
||||||
|
public DateOnly? ReleaseDate { get; set; } = null;
|
||||||
|
|
||||||
|
public string ImageUrl { get; set; } = string.Empty;
|
||||||
|
}
|
||||||
|
}
|
@ -5,9 +5,14 @@
|
|||||||
"Microsoft.AspNetCore": "Warning"
|
"Microsoft.AspNetCore": "Warning"
|
||||||
},
|
},
|
||||||
"Quartz": {
|
"Quartz": {
|
||||||
"JobSchedule": "0 0/1 * * * ?"
|
"JobSchedule": "$JOB_SCHEDULE"
|
||||||
},
|
},
|
||||||
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
|
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
|
||||||
"StationProgramInformationUrl": "https://www.telebilbao.es/"
|
"StationProgramInformationUrl": "https://www.telebilbao.es/",
|
||||||
|
"MovieApi": {
|
||||||
|
"Url": "$MOVIE_API_URL",
|
||||||
|
"ApiKey": "$MOVIE_API_KEY",
|
||||||
|
"ImageUrl": "$MOVIE_IMAGE_URL"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,13 @@
|
|||||||
},
|
},
|
||||||
"AllowedHosts": "*",
|
"AllowedHosts": "*",
|
||||||
"Quartz": {
|
"Quartz": {
|
||||||
"JobSchedule": "0 0/30 * * * ?"
|
"JobSchedule": "$JOB_SCHEDULE"
|
||||||
},
|
},
|
||||||
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
|
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
|
||||||
"StationProgramInformationUrl": "https://www.telebilbao.es/"
|
"StationProgramInformationUrl": "https://www.telebilbao.es/",
|
||||||
|
"MovieApi": {
|
||||||
|
"Url": "$MOVIE_API_URL",
|
||||||
|
"ApiKey": "$MOVIE_API_KEY",
|
||||||
|
"ImageUrl": "$MOVIE_IMAGE_URL"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user