Files
torrentio/src/producer/Features/DataProcessing/DapperDataStorage.cs
iPromKnight 95fa48c851 Woke up to see a discussion about torrentio scraping: powered by community
Was a little inspired. Now we have a database (self populating) of imdb id's - why shouldn't we actually have the ability to scrape any other instance of torrentio, or knightcrawler?

Also restructured the producer to be vertically sliced to make it easier to work with
Too much flicking back and forth between Jobs and Crawlers when configuring
2024-03-02 18:41:57 +00:00

133 lines
4.8 KiB
C#

namespace Producer.Features.DataProcessing;
public class DapperDataStorage(PostgresConfiguration configuration, RabbitMqConfiguration rabbitConfig, ILogger<DapperDataStorage> logger) : IDataStorage
{
private const string InsertTorrentSql =
"""
INSERT INTO ingested_torrents (name, source, category, info_hash, size, seeders, leechers, imdb, processed, "createdAt", "updatedAt")
VALUES (@Name, @Source, @Category, @InfoHash, @Size, @Seeders, @Leechers, @Imdb, @Processed, @CreatedAt, @UpdatedAt)
ON CONFLICT (source, info_hash) DO NOTHING
""";
private const string InsertIngestedPageSql =
"""
INSERT INTO ingested_pages (url, "createdAt", "updatedAt")
VALUES (@Url, @CreatedAt, @UpdatedAt)
""";
private const string GetMovieAndSeriesTorrentsNotProcessedSql =
"""
SELECT
id as "Id",
name as "Name",
source as "Source",
category as "Category",
info_hash as "InfoHash",
size as "Size",
seeders as "Seeders",
leechers as "Leechers",
imdb as "Imdb",
processed as "Processed",
"createdAt" as "CreatedAt",
"updatedAt" as "UpdatedAt"
FROM ingested_torrents
WHERE processed = false AND category != 'xxx'
""";
private const string UpdateProcessedSql =
"""
UPDATE ingested_torrents
Set
processed = true,
"updatedAt" = @UpdatedAt
WHERE id = @Id
""";
public async Task<InsertTorrentResult> InsertTorrents(IReadOnlyCollection<Torrent> torrents, CancellationToken cancellationToken = default)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
var inserted = await connection.ExecuteAsync(InsertTorrentSql, torrents);
return new(true, inserted);
}
catch (Exception e)
{
return new(false, 0, e.Message);
}
}
public async Task<IReadOnlyCollection<Torrent>> GetPublishableTorrents(CancellationToken cancellationToken = default)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
var torrents = await connection.QueryAsync<Torrent>(GetMovieAndSeriesTorrentsNotProcessedSql);
return torrents.Take(rabbitConfig.MaxPublishBatchSize).ToList();
}
catch (Exception e)
{
logger.LogError(e, "Error while getting publishable torrents from database");
return new List<Torrent>();
}
}
public async Task<UpdatedTorrentResult> SetTorrentsProcessed(IReadOnlyCollection<Torrent> torrents, CancellationToken cancellationToken = default)
{
try
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
foreach (var torrent in torrents)
{
torrent.UpdatedAt = DateTime.UtcNow;
}
var updated = await connection.ExecuteAsync(UpdateProcessedSql, torrents);
return new(true, updated);
}
catch (Exception e)
{
return new(false, 0, e.Message);
}
}
public async Task<bool> PageIngested(string pageId, CancellationToken cancellationToken = default)
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
const string query = "SELECT EXISTS (SELECT 1 FROM ingested_pages WHERE url = @Url)";
var result = await connection.ExecuteScalarAsync<bool>(query, new { Url = pageId });
return result;
}
public async Task<PageIngestedResult> MarkPageAsIngested(string pageId, CancellationToken cancellationToken = default)
{
await using var connection = new NpgsqlConnection(configuration.StorageConnectionString);
await connection.OpenAsync(cancellationToken);
try
{
var date = DateTime.UtcNow;
await connection.ExecuteAsync(InsertIngestedPageSql, new
{
Url = pageId,
CreatedAt = date,
UpdatedAt = date,
});
return new(true, "Page successfully marked as ingested");
}
catch (Exception e)
{
return new(false, $"Failed to mark page as ingested: {e.Message}");
}
}
}