initial commit
All checks were successful
Build docker container / build (push) Successful in 5m4s

This commit is contained in:
David Claeys 2024-05-10 16:06:42 +02:00
parent e127afdb26
commit 8d198d46e0
18 changed files with 1022 additions and 1 deletions

30
.dockerignore Normal file
View File

@ -0,0 +1,30 @@
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/azds.yaml
**/bin
**/charts
**/docker-compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
README.md
!**/.gitignore
!.git/HEAD
!.git/config
!.git/packed-refs
!.git/refs/heads/**

View File

@ -0,0 +1,35 @@
name: 'Build docker container'
on: [push]
jobs:
build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ${{ GITHUB_WORKSPACE }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install Docker
run: |
echo "Checking docker installation"
if command -v docker &> /dev/null; then
echo "Docker installation found"
else
echo "Docker installation not found. Docker will be installed"
curl -fsSL https://get.docker.com | sh
fi
- name: Set up Docker Buildx
uses: https://github.com/docker/setup-buildx-action@v3
- name: Docker login
uses: https://github.com/docker/login-action@v3
with:
registry: git.claeyscloud.com
username: nologin
password: ${{ secrets.PACKAGE_TOKEN }}
- name: Build and push
uses: https://github.com/docker/build-push-action@v5
with:
context: .
push: true
tags: |
git.claeyscloud.com/david/telebilbao-epg

1
.gitignore vendored
View File

@ -398,3 +398,4 @@ FodyWeavers.xsd
# JetBrains Rider # JetBrains Rider
*.sln.iml *.sln.iml
/TelebilbaoEpg/telebilbaoEpg.db

48
Dockerfile Normal file
View File

@ -0,0 +1,48 @@
ARG CERT_PASSWORD_ARG=3vo3rmb5DBJXsryjMfJsrpjbKsbj8B
FROM mcr.microsoft.com/dotnet/sdk:8.0-alpine-amd64 as build-env
ARG CERT_PASSWORD_ARG
ENV CERT_PASSWORD=$CERT_PASSWORD_ARG
WORKDIR /App
COPY . ./
RUN dotnet restore \
&& dotnet publish TelebilbaoEpg/TelebilbaoEpg.csproj --no-restore --self-contained false -c Release -o out /p:UseAppHost=false \
&& dotnet dev-certs https --export-path /config/aspnetapp.pem --password "$CERT_PASSWORD" --format PEM \
&& rm **/appsettings.Development.json && rm **/*.pdb
FROM mcr.microsoft.com/dotnet/aspnet:8.0-alpine-amd64
ARG CERT_PASSWORD_ARG
ENV CERT_PASSWORD=$CERT_PASSWORD_ARG
WORKDIR /App
RUN apk update \
&& apk upgrade --available \
&& apk add ca-certificates \
&& apk add tzdata \
&& mkdir /config && mkdir -p /usr/local/share/ca-certificates/
COPY --from=build-env /App/out .
COPY --from=build-env /config /config
ENV DOTNET_CLI_TELEMETRY_OPTOUT=1
ENV ASPNETCORE_ENVIRONMENT=Production
ENV ASPNETCORE_URLS=http://+:80;https://+:443
ENV ASPNETCORE_Kestrel__Certificates__Default__Path=/usr/local/share/ca-certificates/aspnetapp.crt
ENV ASPNETCORE_Kestrel__Certificates__Default__KeyPath=/usr/local/share/ca-certificates/aspnetapp.key
ENV ASPNETCORE_Kestrel__Certificates__Default__Password=$CERT_PASSWORD
RUN chown -R app:app /App/* \
&& cp /config/aspnetapp.pem $ASPNETCORE_Kestrel__Certificates__Default__Path \
&& cp /config/aspnetapp.key $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \
&& chmod 755 $ASPNETCORE_Kestrel__Certificates__Default__Path && chmod +x $ASPNETCORE_Kestrel__Certificates__Default__Path \
&& chown app:app $ASPNETCORE_Kestrel__Certificates__Default__Path \
&& cat $ASPNETCORE_Kestrel__Certificates__Default__Path >> /etc/ssl/certs/ca-certificates.crt \
&& chmod 755 $ASPNETCORE_Kestrel__Certificates__Default__KeyPath && chmod +x $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \
&& chown app:app $ASPNETCORE_Kestrel__Certificates__Default__KeyPath \
&& rm -rf /tmp && mkdir /tmp && chmod 755 /tmp && chown app:app /tmp \
&& update-ca-certificates \
&& rm -rf /config \
&& rm -rf /var/cache/apk/* \
&& mkdir /data && chmod 755 /data \
&& cat > /data/telebilbaoEpg.db \
&& chmod 777 /data/telebilbaoEpg.db \
&& chown -R app:app /data/*
ENTRYPOINT ["dotnet", "TelebilbaoEpg.dll"]
EXPOSE 80
EXPOSE 443

View File

@ -1,3 +1,6 @@
# Telebilbao-Epg # Telebilbao-Epg
Scraper with api for telebilbao epg Scraper with api for telebilbao epg
Epg information for this local tv station is available at https://www.telebilbao.es/programacion/
This program tries to parse the information on that page and to expose it with an api.

View File

@ -0,0 +1,20 @@
using SQLite;
using System;
namespace TelebilbaoEpg.Database.Models
{
public class BroadCast
{
[PrimaryKey, AutoIncrement]
public int Id { get; set; }
public DateTime From { get; set; }
public DateTime To { get; set; }
public string Name { get; set; } = string.Empty;
public string Description { get; set; } = string.Empty;
public string ImageUrl { get; set; } = string.Empty;
}
}

View File

@ -0,0 +1,26 @@
using SQLite;
using System.IO;
using TelebilbaoEpg.Database.Models;
namespace TelebilbaoEpg.Database.Repository
{
public abstract class BaseRepository
{
protected SQLiteConnection _db;
public BaseRepository()
{
var storeFile = "/data/telebilbaoEpg.db";
#if DEBUG
storeFile = storeFile.Replace("/data/", "");
#endif
// Get an absolute path to the database file
var databasePath = Path.Combine(Directory.GetCurrentDirectory(), storeFile);
_db = new SQLiteConnection(databasePath);
_db.CreateTable<BroadCast>();
}
}
}

View File

@ -0,0 +1,33 @@
using System;
using System.Collections.Generic;
using System.Linq;
using TelebilbaoEpg.Database.Models;
namespace TelebilbaoEpg.Database.Repository
{
public class BroadCastRepository : BaseRepository, IBroadCastRepository
{
public void Add(BroadCast broadCast)
{
_db.Insert(broadCast);
}
public List<BroadCast> GetBroadCasts(DateOnly day)
{
return _db.Table<BroadCast>()
.ToList()
.Where(b => DateOnly.FromDateTime(b.From.Date) == day || DateOnly.FromDateTime(b.To) == day)
.OrderBy(b => b.From)
.ToList();
}
public List<BroadCast> GetBroadCasts(DateOnly from, DateOnly to)
{
return _db.Table<BroadCast>()
.ToList()
.Where(b => (DateOnly.FromDateTime(b.From) >= from || DateOnly.FromDateTime(b.To) >= from) && (DateOnly.FromDateTime(b.From) <= to || DateOnly.FromDateTime(b.To) <= to))
.OrderBy(b => b.From)
.ToList();
}
}
}

View File

@ -0,0 +1,15 @@
using System;
using System.Collections.Generic;
using TelebilbaoEpg.Database.Models;
namespace TelebilbaoEpg.Database.Repository
{
public interface IBroadCastRepository
{
List<BroadCast> GetBroadCasts(DateOnly day);
List<BroadCast> GetBroadCasts(DateOnly from, DateOnly to);
void Add(BroadCast broadCast);
}
}

View File

@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8</TargetFramework>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="sqlite-net-pcl" Version="1.9.172" />
</ItemGroup>
</Project>

31
TelebilbaoEpg.sln Normal file
View File

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.9.34728.123
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TelebilbaoEpg", "TelebilbaoEpg\TelebilbaoEpg.csproj", "{D9128F6D-C239-40E0-83DB-E98FEE81B5F8}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TelebilbaoEpg.Database", "TelebilbaoEpg.Database\TelebilbaoEpg.Database.csproj", "{ABDB2C86-77F1-4E3F-A224-F1066323BCDE}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D9128F6D-C239-40E0-83DB-E98FEE81B5F8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D9128F6D-C239-40E0-83DB-E98FEE81B5F8}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D9128F6D-C239-40E0-83DB-E98FEE81B5F8}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D9128F6D-C239-40E0-83DB-E98FEE81B5F8}.Release|Any CPU.Build.0 = Release|Any CPU
{ABDB2C86-77F1-4E3F-A224-F1066323BCDE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{ABDB2C86-77F1-4E3F-A224-F1066323BCDE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{ABDB2C86-77F1-4E3F-A224-F1066323BCDE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{ABDB2C86-77F1-4E3F-A224-F1066323BCDE}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CB4CE024-1BA3-404B-A8F8-E60361DE0384}
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,31 @@
using Microsoft.AspNetCore.Mvc;
using TelebilbaoEpg.Database.Models;
using TelebilbaoEpg.Database.Repository;
namespace TelebilbaoEpg.Controllers
{
[Route("api/[controller]")]
[ApiController]
public class BroadCastController : ControllerBase
{
private IBroadCastRepository _broadCastRepository;
public BroadCastController(IBroadCastRepository broadCastRepository)
{
_broadCastRepository = broadCastRepository;
}
[HttpGet("today")]
public List<BroadCast> GetToday()
{
var today = DateTime.Now.Date;
return _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(today));
}
[HttpGet]
public List<BroadCast> Get(DateOnly from, DateOnly to)
{
return _broadCastRepository.GetBroadCasts(from, to);
}
}
}

View File

@ -0,0 +1,567 @@
using HtmlAgilityPack;
using Quartz;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;
using System.Web;
using TableSpans.HtmlAgilityPack;
using TelebilbaoEpg.Database.Models;
using TelebilbaoEpg.Database.Repository;
namespace TelebilbaoEpg.Jobs
{
public class ScrapeJob : IJob
{
private IConfiguration _configuration;
private IBroadCastRepository _broadCastRepository;
public ScrapeJob(IConfiguration configuration, IBroadCastRepository broadCastRepository)
{
_configuration = configuration;
_broadCastRepository = broadCastRepository;
}
private List<TimeBlock> GetTimeBlocks(HtmlNode programTable)
{
var ret = new List<TimeBlock>();
var timeBlocks = programTable.SelectNodes("tbody/tr/td[1]");
if (timeBlocks != null)
{
TimeOnly? previousTime = null;
var index = 0;
var blockIndex = 0;
foreach (var node in timeBlocks)
{
if (!ret.Any(b => b.RowIndex == index))
{
var text = node.InnerText;
var currentBlock = new TimeBlock()
{
RowIndex = index,
BlockIndex = blockIndex,
};
if (!string.IsNullOrEmpty(text))
{
TimeOnly? parsedValue = null;
try
{
var sanitizedtext = text.Replace("::", ":");
parsedValue = TimeOnly.Parse(sanitizedtext);
}
catch (FormatException)
{
var sections = text.Split('.');
if (sections.Length == 2)
{
var hourSection = sections[0];
var minuteSection = sections[1];
if (!string.IsNullOrEmpty(hourSection) && !string.IsNullOrEmpty(minuteSection))
{
var hour = int.Parse(hourSection);
var minute = int.Parse(minuteSection);
parsedValue = new TimeOnly(hour, minute);
}
}
}
if (parsedValue.HasValue)
{
currentBlock.From = parsedValue.Value;
}
}
var shouldAdd = !ret.Any(b => b.From > currentBlock.From);
if (!shouldAdd)
{
//start of day by blocks
var startDay = ret.First(b => b.BlockIndex == 0).From;
if (currentBlock.From < startDay)
{
shouldAdd = ret.Any(b => currentBlock.From < b.From);
}
}
shouldAdd = shouldAdd && currentBlock.From.HasValue;
if (shouldAdd)
{
ret.Add(currentBlock);
if (previousTime.HasValue)
{
var previousBlock = ret
.OrderByDescending(b => b.RowIndex)
.FirstOrDefault(b => b.From < currentBlock.From);
if (previousBlock != null)
{
previousBlock.To = currentBlock.From.Value;
}
}
previousTime = currentBlock.From;
blockIndex++;
}
}
index++;
}
var firstBlock = ret.OrderBy(b => b.RowIndex)
.FirstOrDefault();
var lastBlock = ret.OrderByDescending(b => b.RowIndex)
.FirstOrDefault();
if (firstBlock != null && lastBlock != null && firstBlock.From.HasValue)
{
lastBlock.To = firstBlock.From.Value;
}
}
return ret;
}
public async Task Execute(IJobExecutionContext context)
{
var tableScrapeUrl = _configuration.GetValue<string>("TableScrapeUrl");
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = hw.Load(tableScrapeUrl);
var tableSpanExtension = new TableSpansExtension();
var programTable = tableSpanExtension.ProcessTable(doc.DocumentNode.SelectSingleNode("//table"));
var timeBlocks = GetTimeBlocks(programTable);
// week starts at monday
var startOfWeek = DateTime.Now.Date.AddDays(-((int)DateTime.Now.DayOfWeek) + 1);
var dayColumnStart = 2;
var dayColumnEnd = dayColumnStart + 7;
var parsedBroadCasts = new List<BroadCast>();
var tableRows = programTable.SelectNodes($"tbody/tr");
for (int dayIndex = dayColumnStart; dayIndex < dayColumnEnd; dayIndex++)
{
var programBlocks = programTable.SelectNodes($"tbody/tr/td[{dayIndex}]");
if (programBlocks != null)
{
var day = startOfWeek.AddDays(dayIndex - dayColumnStart);
//reset counter
var rowIndex = 0;
foreach (var programBlock in programBlocks)
{
var currentDay = day;
var columnIndex = dayIndex;
var beginIndex = rowIndex;
var rowSpan = 0;
var rowPathIndex = programBlock.XPath.IndexOf("/tr");
var xpath = $"//table/tbody{programBlock.XPath.Substring(rowPathIndex)}";
var originalNode = doc.DocumentNode.SelectSingleNode(xpath);
if (originalNode != null)
{
if (originalNode.Attributes.Contains("rowspan"))
{
rowSpan = int.Parse(originalNode.Attributes["rowspan"].Value);
}
}
var broadCastsToAdd = new List<BroadCast>();
TimeOnly? startTime = null;
TimeOnly? endTime = null;
var startBlock = timeBlocks.FirstOrDefault(b => b.RowIndex == beginIndex);
if (startBlock == null)
{
startBlock = timeBlocks.OrderByDescending(b => b.RowIndex)
.Where(b => b.RowIndex <= rowIndex + 1)
.FirstOrDefault();
}
if (startBlock != null)
{
startTime = startBlock.From;
endTime = startBlock.To;
}
if (startTime.HasValue && endTime.HasValue)
{
if (startTime.Value.Hour < 7 || endTime.Value.Hour < 7)
{
currentDay = currentDay.AddDays(1);
}
var startDate = currentDay.AddTicks(startTime.Value.Ticks);
var endDate = currentDay.AddTicks(endTime.Value.Ticks);
var text = HttpUtility.HtmlDecode(programBlock.InnerText);
string timepattern = "(?:2[0-3]|[01]?[0-9])[:.][0-5]?[0-9]";
var needsSplitByTimePattern = Regex.IsMatch(text, timepattern);
var separator = "—";
var needsSplitBySeparator = text.Contains(separator);
var needsSplitByHorizontalRow = programBlock.SelectSingleNode("hr") != null;
if (needsSplitByTimePattern)
{
var match = Regex.Match(text, timepattern);
if (match.Success)
{
var firstProgramText = text.Substring(0, match.Index);
var secondProgramText = text.Substring(match.Index + match.Length);
var splitTime = TimeOnly.Parse(match.Value);
var splitDate = currentDay.AddTicks(splitTime.Ticks);
if (!string.IsNullOrEmpty(firstProgramText))
{
var firstProgram = new BroadCast()
{
From = startDate,
To = splitDate,
Name = SanitizeText(firstProgramText),
};
broadCastsToAdd.Add(firstProgram);
}
if (!string.IsNullOrEmpty(secondProgramText))
{
var secondProgram = new BroadCast()
{
From = splitDate,
To = endDate,
Name = SanitizeText(secondProgramText),
};
broadCastsToAdd.Add(secondProgram);
}
}
}
else if (needsSplitBySeparator)
{
var separatorIndex = text.IndexOf(separator);
var endBlock = timeBlocks.FirstOrDefault(b => b.RowIndex == beginIndex + rowSpan);
if (endBlock == null)
{
endBlock = timeBlocks
.OrderByDescending(b => b.RowIndex)
.Where(b => beginIndex + rowSpan > b.RowIndex)
.FirstOrDefault();
}
if (endBlock != null && endBlock.To.HasValue)
{
var blockStartDate = startDate;
var blockEndtime = endBlock.To.Value;
var blockEndDate = currentDay.AddTicks(blockEndtime.Ticks);
var duration = blockEndDate - blockStartDate;
var splitDate = rowSpan > 0 ? blockStartDate.AddMinutes((int)duration.TotalMinutes / rowSpan) : blockStartDate.AddMinutes((int)duration.Minutes / 2);
var firstProgramText = string.Empty;
var secondProgramText = string.Empty;
if (separatorIndex > 0)
{
firstProgramText = text.Substring(0, separatorIndex);
secondProgramText = text.Substring(separatorIndex);
}
else
{
secondProgramText = text.Replace(separator, "");
}
var firstProgramName = SanitizeText(firstProgramText);
var secondProgramName = SanitizeText(secondProgramText);
if (!string.IsNullOrEmpty(firstProgramName))
{
var firstProgram = new BroadCast()
{
From = startDate,
To = splitDate,
Name = firstProgramName,
};
broadCastsToAdd.Add(firstProgram);
}
if (!string.IsNullOrEmpty(secondProgramName) && splitDate <= endDate)
{
var secondProgram = new BroadCast()
{
From = splitDate,
To = endDate,
Name = secondProgramName,
};
broadCastsToAdd.Add(secondProgram);
}
}
}
else if (needsSplitByHorizontalRow)
{
var textNodes = new List<HtmlNode>();
var nodeCollection = programBlock.SelectNodes("strong");
if (nodeCollection != null)
{
textNodes.AddRange(nodeCollection.Where(n => !string.IsNullOrEmpty(n.InnerText)).ToList());
}
nodeCollection = programBlock.SelectNodes("p");
if (nodeCollection != null)
{
textNodes.AddRange(nodeCollection.Where(n => !string.IsNullOrEmpty(n.InnerText)).ToList());
}
var nodeCount = textNodes.Count;
if (nodeCount > 0)
{
var endBlock = timeBlocks.FirstOrDefault(b => b.RowIndex == beginIndex + rowSpan);
if (endBlock == null)
{
endBlock = timeBlocks
.OrderByDescending(b => b.RowIndex)
.Where(b => beginIndex + rowSpan > b.RowIndex)
.FirstOrDefault();
}
if (endBlock != null && endBlock.To.HasValue)
{
var blockStartDate = startDate;
var blockEndtime = endBlock.To.Value;
var blockEndDate = currentDay.AddTicks(blockEndtime.Ticks);
var duration = blockEndDate - blockStartDate;
var itemDuration = duration.TotalMinutes / nodeCount;
for (int i = 0; i < nodeCount; i++)
{
var nodeStartDate = blockStartDate.AddMinutes(i * itemDuration);
var nodeEndDate = nodeStartDate.AddMinutes(itemDuration);
var node = textNodes[i];
var nodeText = node.InnerText;
var currentText = SanitizeText(nodeText);
var currentBroadcast = new BroadCast()
{
From = nodeStartDate,
To = nodeEndDate,
Name = currentText,
};
broadCastsToAdd.Add(currentBroadcast);
}
}
}
}
else
{
var name = SanitizeText(text);
var previousIndex = rowIndex - 1;
var broadCast = new BroadCast()
{
From = startDate,
To = endDate,
Name = name,
};
broadCastsToAdd.Add(broadCast);
}
foreach (var item in broadCastsToAdd)
{
var add = !parsedBroadCasts.Any(b => b.To >= item.From && b.Name.Equals(item.Name)) && !parsedBroadCasts.Any(b => b.From == item.From && b.To == item.To);
if (add)
{
parsedBroadCasts.Add(item);
}
else
{
var broadCastToUpdate = parsedBroadCasts.FirstOrDefault(b => b.To >= item.From && b.Name.Equals(item.Name));
if (broadCastToUpdate != null)
{
broadCastToUpdate.To = item.To;
}
}
}
}
rowIndex++;
}
}
}
var stationProgramInformationUrl = _configuration.GetValue<string>("StationProgramInformationUrl");
doc = hw.Load(stationProgramInformationUrl);
var parsedPrograms = new List<ProgramItem>();
var titleNodeCollection = doc.DocumentNode.SelectNodes("//h2[contains(@class, 'programa_title')]");
if (titleNodeCollection != null)
{
foreach (var titleNode in titleNodeCollection)
{
var title = SanitizeText(titleNode.InnerText);
var description = string.Empty;
var imageUrl = string.Empty;
var programWrapper = titleNode.ParentNode.ParentNode.ParentNode;
var imageWrapper = programWrapper.SelectSingleNode("div[contains(@class, 'wpb_single_image')]");
if (imageWrapper != null)
{
var imagenode = imageWrapper.SelectSingleNode("figure/div/img");
if (imagenode != null)
{
var attributeName = "src";
imageUrl = imagenode.Attributes.Contains(attributeName) ? imagenode.Attributes[attributeName].Value : imageUrl;
}
}
var descriptionNode = programWrapper.SelectSingleNode("div[contains(@class, 'vc_row-o-content-bottom')]"); //vc_row-o-content-bottom
if (descriptionNode != null)
{
description = SanitizeText(descriptionNode.InnerText);
}
if (!string.IsNullOrEmpty(title) && !string.IsNullOrEmpty(description))
{
var program = new ProgramItem
{
Description = description,
Name = title,
ImageUrl = imageUrl,
};
parsedPrograms.Add(program);
}
}
}
foreach(var broadcast in parsedBroadCasts)
{
var program = parsedPrograms.FirstOrDefault(p => p.Name == broadcast.Name);
if(program != null)
{
broadcast.Description = program.Description;
broadcast.ImageUrl = program.ImageUrl;
}
}
var startSaveDate = parsedBroadCasts.Min(x => x.From);
var endSaveDate = parsedBroadCasts.Max(x => x.To);
var savedBroadCasts = _broadCastRepository.GetBroadCasts(DateOnly.FromDateTime(startSaveDate), DateOnly.FromDateTime(endSaveDate));
foreach (var broadcast in parsedBroadCasts)
{
var shouldSave = !savedBroadCasts.Any(b => b.From == broadcast.From && b.To == broadcast.To && b.Name == broadcast.Name);
if (shouldSave)
{
_broadCastRepository.Add(broadcast);
}
}
}
private string SanitizeText(string text)
{
var ret = string.Empty;
if (!string.IsNullOrEmpty(text))
{
//proper lower and upper case fromatting
ret = CultureInfo.CurrentCulture.TextInfo.ToTitleCase(text.ToLower()).Trim();
ret = ret.Replace("\n", " ").Replace(" ", " ");
var separatorIndex = ret.IndexOf("—");
if (separatorIndex > -1)
{
ret = ret.Substring(0, separatorIndex).Trim();
}
}
return ret;
}
public class TimeBlock
{
public TimeOnly? From { get; set; }
public TimeOnly? To { get; set; }
public int RowIndex { get; set; }
public int BlockIndex { get; set; }
}
public class ProgramItem
{
public string Name { get; set; } = string.Empty;
public string Description { get; set; }
public string ImageUrl { get; set; }
}
}
}

64
TelebilbaoEpg/Program.cs Normal file
View File

@ -0,0 +1,64 @@
using Quartz;
using TelebilbaoEpg.Database.Repository;
using TelebilbaoEpg.Jobs;
var builder = WebApplication.CreateBuilder(args);
// Add services to the container.
builder.Services.AddControllers();
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerGen();
builder.Services.AddQuartz();
builder.Services.AddQuartzHostedService(opt =>
{
opt.WaitForJobsToComplete = true;
});
builder.Logging.ClearProviders();
builder.Logging.AddConsole();
builder.Services.AddScoped<IBroadCastRepository, BroadCastRepository>();
var app = builder.Build();
// Configure the HTTP request pipeline.
//if (app.Environment.IsDevelopment())
//{
app.UseSwagger();
app.UseSwaggerUI();
//}
app.UseHttpsRedirection();
app.UseAuthorization();
app.MapControllers();
var configuration = app.Configuration;
string jobSchedule = configuration.GetValue<string>("Quartz:JobSchedule");
var schedulerFactory = app.Services.GetRequiredService<ISchedulerFactory>();
var scheduler = await schedulerFactory.GetScheduler();
// define the job and tie it to our HelloJob class
var job = JobBuilder.Create<ScrapeJob>()
.Build();
var trigger = TriggerBuilder.Create()
.WithIdentity("Cron trigger", "Scrape")
.StartNow()
.WithCronSchedule(jobSchedule)
.Build();
//var trigger = TriggerBuilder.Create()
// .WithIdentity("Cron trigger", "Scrape")
// .StartNow()
// .Build();
await scheduler.ScheduleJob(job, trigger);
app.Run();

View File

@ -0,0 +1,52 @@
{
"profiles": {
"http": {
"commandName": "Project",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"dotnetRunMessages": true,
"applicationUrl": "http://localhost:5242"
},
"https": {
"commandName": "Project",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"dotnetRunMessages": true,
"applicationUrl": "https://localhost:7077;http://localhost:5242"
},
"IIS Express": {
"commandName": "IISExpress",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
},
"Container (Dockerfile)": {
"commandName": "Docker",
"launchBrowser": true,
"launchUrl": "{Scheme}://{ServiceHost}:{ServicePort}/swagger",
"environmentVariables": {
"ASPNETCORE_HTTPS_PORTS": "8081",
"ASPNETCORE_HTTP_PORTS": "8080"
},
"publishAllPorts": true,
"useSSL": true
}
},
"$schema": "http://json.schemastore.org/launchsettings.json",
"iisSettings": {
"windowsAuthentication": false,
"anonymousAuthentication": true,
"iisExpress": {
"applicationUrl": "http://localhost:19838",
"sslPort": 44365
}
}
}

View File

@ -0,0 +1,26 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<RootNamespace>Telebilbap_Epg</RootNamespace>
<UserSecretsId>3104d886-ec84-40b1-8f80-9a0670a7d2f3</UserSecretsId>
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="HtmlAgilityPack" Version="1.11.61" />
<PackageReference Include="Microsoft.VisualStudio.Azure.Containers.Tools.Targets" Version="1.20.1" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
<PackageReference Include="Quartz" Version="3.8.1" />
<PackageReference Include="Quartz.Extensions.DependencyInjection" Version="3.8.1" />
<PackageReference Include="Quartz.Extensions.Hosting" Version="3.8.1" />
<PackageReference Include="TableSpans.HtmlAgilityPack" Version="1.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\TelebilbaoEpg.Database\TelebilbaoEpg.Database.csproj" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,13 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
},
"Quartz": {
"JobSchedule": "0 0/1 * * * ?"
},
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
"StationProgramInformationUrl": "https://www.telebilbao.es/"
}
}

View File

@ -0,0 +1,14 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
}
},
"AllowedHosts": "*",
"Quartz": {
"JobSchedule": "0 0/30 * * * ?"
},
"TableScrapeUrl": "https://www.telebilbao.es/programacion/",
"StationProgramInformationUrl": "https://www.telebilbao.es/"
}