New: Support JSON parsing in Cardigann

Co-Authored-By: mikeoscar2006 <89641725+mikeoscar2006@users.noreply.github.com>
This commit is contained in:
Qstick 2021-10-30 16:19:22 -05:00
parent 93deb56e8e
commit 2c0c6aa158
6 changed files with 531 additions and 351 deletions

View file

@ -26,7 +26,7 @@ namespace NzbDrone.Core.IndexerVersions
{
/* Update Service will fall back if version # does not exist for an indexer per Ta */
private const int DEFINITION_VERSION = 2;
private const int DEFINITION_VERSION = 3;
private readonly List<string> _defintionBlocklist = new List<string>()
{
"aither",

View file

@ -141,7 +141,7 @@ namespace NzbDrone.Core.Indexers.Cardigann
return element.QuerySelector(selector);
}
protected string HandleSelector(SelectorBlock selector, IElement dom, Dictionary<string, object> variables = null)
protected string HandleSelector(SelectorBlock selector, IElement dom, Dictionary<string, object> variables = null, bool required = true)
{
if (selector.Text != null)
{
@ -163,9 +163,14 @@ namespace NzbDrone.Core.Indexers.Cardigann
}
if (selection == null)
{
if (required)
{
throw new Exception(string.Format("Selector \"{0}\" didn't match {1}", selector.Selector, dom.ToHtmlPretty()));
}
return null;
}
}
if (selector.Remove != null)
@ -188,17 +193,27 @@ namespace NzbDrone.Core.Indexers.Cardigann
}
if (value == null)
{
if (required)
{
throw new Exception(string.Format("None of the case selectors \"{0}\" matched {1}", string.Join(",", selector.Case), selection.ToHtmlPretty()));
}
return null;
}
}
else if (selector.Attribute != null)
{
value = selection.GetAttribute(selector.Attribute);
if (value == null)
{
if (required)
{
throw new Exception(string.Format("Attribute \"{0}\" is not set for element {1}", selector.Attribute, selection.ToHtmlPretty()));
}
return null;
}
}
else
{
@ -208,6 +223,57 @@ namespace NzbDrone.Core.Indexers.Cardigann
return ApplyFilters(ParseUtil.NormalizeSpace(value), selector.Filters, variables);
}
protected string HandleJsonSelector(SelectorBlock selector, JToken parentObj, Dictionary<string, object> variables = null, bool required = true)
{
if (selector.Text != null)
{
return ApplyFilters(ApplyGoTemplateText(selector.Text, variables), selector.Filters, variables);
}
string value = null;
if (selector.Selector != null)
{
var selector_Selector = ApplyGoTemplateText(selector.Selector.TrimStart('.'), variables);
var selection = parentObj.SelectToken(selector_Selector);
if (selection == null)
{
if (required)
{
throw new Exception(string.Format("Selector \"{0}\" didn't match {1}", selector_Selector, parentObj.ToString()));
}
return null;
}
value = selection.Value<string>();
}
if (selector.Case != null)
{
foreach (var jcase in selector.Case)
{
if (value.Equals(jcase.Key) || jcase.Key.Equals("*"))
{
value = jcase.Value;
break;
}
}
if (value == null)
{
if (required)
{
throw new Exception(string.Format("None of the case selectors \"{0}\" matched {1}", string.Join(",", selector.Case), parentObj.ToString()));
}
return null;
}
}
return ApplyFilters(ParseUtil.NormalizeSpace(value), selector.Filters, variables);
}
protected Dictionary<string, object> GetBaseTemplateVariables()
{
var indexerLogging = _configService.LogIndexerResponse;

View file

@ -150,13 +150,15 @@ namespace NzbDrone.Core.Indexers.Cardigann
{
public int After { get; set; }
public SelectorBlock Dateheaders { get; set; }
public SelectorBlock Count { get; set; }
}
public class SearchPathBlock : RequestBlock
{
public List<string> Categories { get; set; }
public bool Inheritinputs { get; set; } = true;
public bool Followredirect { get; set; } = false;
public bool Followredirect { get; set; }
public ResponseBlock Response { get; set; }
}
public class RequestBlock
@ -194,4 +196,12 @@ namespace NzbDrone.Core.Indexers.Cardigann
{
public SelectorField Pathselector { get; set; }
}
public class ResponseBlock
{
public string Type { get; set; }
public string Attribute { get; set; }
public bool Multiple { get; set; }
public string NoResultsMessage { get; set; }
}
}

View file

@ -5,6 +5,7 @@ using System.Net;
using System.Text.RegularExpressions;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Newtonsoft.Json.Linq;
using NLog;
using NzbDrone.Common.Extensions;
using NzbDrone.Core.Configuration;
@ -55,6 +56,107 @@ namespace NzbDrone.Core.Indexers.Cardigann
var searchUrlUri = new Uri(request.Url.FullUri);
if (request.SearchPath.Response != null && request.SearchPath.Response.Type.Equals("json"))
{
if (request.SearchPath.Response != null && request.SearchPath.Response.NoResultsMessage != null && (request.SearchPath.Response.NoResultsMessage.Equals(results) || (request.SearchPath.Response.NoResultsMessage == string.Empty && results == string.Empty)))
{
return releases;
}
var parsedJson = JToken.Parse(results);
if (parsedJson == null)
{
throw new Exception("Error Parsing Json Response");
}
if (search.Rows.Count != null)
{
var countVal = HandleJsonSelector(search.Rows.Count, parsedJson, variables);
if (int.TryParse(countVal, out var count))
{
if (count < 1)
{
return releases;
}
}
}
var rowsObj = parsedJson.SelectToken(search.Rows.Selector);
if (rowsObj == null)
{
throw new Exception("Error Parsing Rows Selector");
}
foreach (var row in rowsObj.Value<JArray>())
{
var selObj = request.SearchPath.Response.Attribute != null ? row.SelectToken(request.SearchPath.Response.Attribute).Value<JToken>() : row;
var mulRows = request.SearchPath.Response.Multiple == true ? selObj.Values<JObject>() : new List<JObject> { selObj.Value<JObject>() };
foreach (var mulRow in mulRows)
{
var release = new TorrentInfo();
foreach (var field in search.Fields)
{
var fieldParts = field.Key.Split('|');
var fieldName = fieldParts[0];
var fieldModifiers = new List<string>();
for (var i = 1; i < fieldParts.Length; i++)
{
fieldModifiers.Add(fieldParts[i]);
}
string value = null;
var variablesKey = ".Result." + fieldName;
var isOptional = OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional;
try
{
var parentObj = mulRow;
if (field.Value.Selector != null && field.Value.Selector.StartsWith(".."))
{
parentObj = row.Value<JObject>();
}
value = HandleJsonSelector(field.Value, parentObj, variables, !isOptional);
if (isOptional && string.IsNullOrWhiteSpace(value))
{
variables[variablesKey] = null;
continue;
}
variables[variablesKey] = ParseFields(value, fieldName, release, fieldModifiers, searchUrlUri);
}
catch (Exception ex)
{
if (!variables.ContainsKey(variablesKey))
{
variables[variablesKey] = null;
}
if (isOptional)
{
variables[variablesKey] = null;
continue;
}
throw new Exception(string.Format("Error while parsing field={0}, selector={1}, value={2}: {3}", field.Key, field.Value.Selector, value ?? "<null>", ex.Message));
}
var filters = search.Rows.Filters;
var skipRelease = ParseRowFilters(filters, release, variables, row);
if (skipRelease)
{
continue;
}
}
releases.Add(release);
}
}
}
else
{
try
{
var searchResultParser = new HtmlParser();
@ -120,9 +222,128 @@ namespace NzbDrone.Core.Indexers.Cardigann
string value = null;
var variablesKey = ".Result." + fieldName;
var isOptional = OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional;
try
{
value = HandleSelector(field.Value, row, variables);
value = HandleSelector(field.Value, row, variables, !isOptional);
if (isOptional && string.IsNullOrWhiteSpace(value))
{
variables[variablesKey] = null;
continue;
}
variables[variablesKey] = ParseFields(value, fieldName, release, fieldModifiers, searchUrlUri);
}
catch (Exception ex)
{
if (!variables.ContainsKey(variablesKey))
{
variables[variablesKey] = null;
}
if (OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional)
{
variables[variablesKey] = null;
continue;
}
if (indexerLogging)
{
_logger.Trace("Error while parsing field={0}, selector={1}, value={2}: {3}", field.Key, field.Value.Selector, value == null ? "<null>" : value, ex.Message);
}
}
}
var filters = search.Rows.Filters;
var skipRelease = ParseRowFilters(filters, release, variables, row);
if (skipRelease)
{
continue;
}
// if DateHeaders is set go through the previous rows and look for the header selector
var dateHeaders = _definition.Search.Rows.Dateheaders;
if (release.PublishDate == DateTime.MinValue && dateHeaders != null)
{
var prevRow = row.PreviousElementSibling;
string value = null;
if (prevRow == null)
{
// continue with parent
var parent = row.ParentElement;
if (parent != null)
{
prevRow = parent.PreviousElementSibling;
}
}
while (prevRow != null)
{
var curRow = prevRow;
_logger.Debug(prevRow.OuterHtml);
try
{
value = HandleSelector(dateHeaders, curRow);
break;
}
catch (Exception)
{
// do nothing
}
prevRow = curRow.PreviousElementSibling;
if (prevRow == null)
{
// continue with parent
var parent = curRow.ParentElement;
if (parent != null)
{
prevRow = parent.PreviousElementSibling;
}
}
}
if (value == null && dateHeaders.Optional == false)
{
throw new Exception(string.Format("No date header row found for {0}", release.ToString()));
}
if (value != null)
{
release.PublishDate = DateTimeUtil.FromUnknown(value);
}
}
releases.Add(release);
}
catch (Exception ex)
{
_logger.Error(ex, "CardigannIndexer ({0}): Error while parsing row '{1}':\n\n{2}", _definition.Id, row.ToHtmlPretty());
}
}
}
catch (Exception)
{
// OnParseError(results, ex);
throw;
}
}
/*
if (query.Limit > 0)
{
releases = releases.Take(query.Limit).ToList();
}*/
_logger.Debug($"Got {releases.Count} releases");
return releases;
}
private string ParseFields(string value, string fieldName, TorrentInfo release, List<string> fieldModifiers, Uri searchUrlUri)
{
switch (fieldName)
{
case "download":
@ -313,30 +534,13 @@ namespace NzbDrone.Core.Indexers.Cardigann
break;
}
variables[variablesKey] = value;
}
catch (Exception ex)
{
if (!variables.ContainsKey(variablesKey))
{
variables[variablesKey] = null;
return value;
}
if (OptionalFields.Contains(field.Key) || fieldModifiers.Contains("optional") || field.Value.Optional)
private bool ParseRowFilters(List<FilterBlock> filters, ReleaseInfo release, Dictionary<string, object> variables, object row)
{
variables[variablesKey] = null;
continue;
}
if (indexerLogging)
{
_logger.Trace("Error while parsing field={0}, selector={1}, value={2}: {3}", field.Key, field.Value.Selector, value == null ? "<null>" : value, ex.Message);
}
}
}
var filters = search.Rows.Filters;
var skipRelease = false;
if (filters != null)
{
foreach (var filter in filters)
@ -350,35 +554,12 @@ namespace NzbDrone.Core.Indexers.Cardigann
characterLimit = int.Parse(filter.Args);
}
/*
if (query.ImdbID != null && TorznabCaps.SupportsImdbMovieSearch)
{
break; // skip andmatch filter for imdb searches
}
if (query.TmdbID != null && TorznabCaps.SupportsTmdbMovieSearch)
{
break; // skip andmatch filter for tmdb searches
}
if (query.TvdbID != null && TorznabCaps.SupportsTvdbSearch)
{
break; // skip andmatch filter for tvdb searches
}
var queryKeywords = variables[".Keywords"] as string;
if (!query.MatchQueryStringAND(release.Title, characterLimit, queryKeywords))
{
_logger.Debug(string.Format("CardigannIndexer ({0}): skipping {1} (andmatch filter)", _definition.Id, release.Title));
skipRelease = true;
}
*/
break;
case "strdump":
// for debugging
_logger.Debug(string.Format("CardigannIndexer ({0}): row strdump: {1}", _definition.Id, row.ToHtmlPretty()));
_logger.Debug(string.Format("CardigannIndexer ({0}): row strdump: {1}", _definition.Id, row.ToString()));
break;
default:
_logger.Error(string.Format("CardigannIndexer ({0}): Unsupported rows filter: {1}", _definition.Id, filter.Name));
@ -387,87 +568,7 @@ namespace NzbDrone.Core.Indexers.Cardigann
}
}
if (skipRelease)
{
continue;
}
// if DateHeaders is set go through the previous rows and look for the header selector
var dateHeaders = _definition.Search.Rows.Dateheaders;
if (release.PublishDate == DateTime.MinValue && dateHeaders != null)
{
var prevRow = row.PreviousElementSibling;
string value = null;
if (prevRow == null)
{
// continue with parent
var parent = row.ParentElement;
if (parent != null)
{
prevRow = parent.PreviousElementSibling;
}
}
while (prevRow != null)
{
var curRow = prevRow;
_logger.Debug(prevRow.OuterHtml);
try
{
value = HandleSelector(dateHeaders, curRow);
break;
}
catch (Exception)
{
// do nothing
}
prevRow = curRow.PreviousElementSibling;
if (prevRow == null)
{
// continue with parent
var parent = curRow.ParentElement;
if (parent != null)
{
prevRow = parent.PreviousElementSibling;
}
}
}
if (value == null && dateHeaders.Optional == false)
{
throw new Exception(string.Format("No date header row found for {0}", release.ToString()));
}
if (value != null)
{
release.PublishDate = DateTimeUtil.FromUnknown(value);
}
}
releases.Add(release);
}
catch (Exception ex)
{
_logger.Error(ex, "CardigannIndexer ({0}): Error while parsing row '{1}':\n\n{2}", _definition.Id, row.ToHtmlPretty());
}
}
}
catch (Exception)
{
// OnParseError(results, ex);
throw;
}
/*
if (query.Limit > 0)
{
releases = releases.Take(query.Limit).ToList();
}*/
_logger.Debug($"Got {releases.Count} releases");
return releases;
return skipRelease;
}
}
}

View file

@ -6,17 +6,20 @@ namespace NzbDrone.Core.Indexers.Cardigann
public class CardigannRequest : IndexerRequest
{
public Dictionary<string, object> Variables { get; private set; }
public SearchPathBlock SearchPath { get; private set; }
public CardigannRequest(string url, HttpAccept httpAccept, Dictionary<string, object> variables)
public CardigannRequest(string url, HttpAccept httpAccept, Dictionary<string, object> variables, SearchPathBlock searchPath)
: base(url, httpAccept)
{
Variables = variables;
SearchPath = searchPath;
}
public CardigannRequest(HttpRequest httpRequest, Dictionary<string, object> variables)
public CardigannRequest(HttpRequest httpRequest, Dictionary<string, object> variables, SearchPathBlock searchPath)
: base(httpRequest)
{
Variables = variables;
SearchPath = searchPath;
}
}
}

View file

@ -1067,7 +1067,7 @@ namespace NzbDrone.Core.Indexers.Cardigann
}
}
var request = new CardigannRequest(requestbuilder.Build(), variables);
var request = new CardigannRequest(requestbuilder.Build(), variables, searchPath);
// send HTTP request
if (search.Headers != null)