const { isURL } = require('validator');
const { EXACT_TRACKER_QUERY_PARAMS, PREFIX_TRACKER_QUERY_PARAMS } = require('./constants');

const isURLOptions = { allow_underscores: true, allow_trailing_dot: true };

function wildcardToRegex(str) {
  const escapeRegex = (u) => u.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, '\\$1');
  return new RegExp(`^${str.split('*').map(escapeRegex).join('.*')}$`);
}

const getRawSearchParams = (uri) => {
  const originalSearchWithHash = uri.split('?', 2)[1] || '';
  const originalSearch = originalSearchWithHash.split('#', 2)[0] || '';
  return (
    originalSearch
      .split('&')
      .filter((p) => p)
      .reduce((m, p) => {
        const [key, value] = p.split('=');
        m.set(key, value);
        return m;
      }, new Map()) || new Map()
  );
};

class UriLib {
  static decodeURIComponent(encodedURI) {
    return UriLib._decodedURIWithFunc(encodedURI, decodeURIComponent);
  }

  static decodeURI(encodedURI) {
    return UriLib._decodedURIWithFunc(encodedURI, decodeURI);
  }

  static safeDecodeURIComponent(uri) {
    try {
      return decodeURIComponent(uri);
    } catch {
      return uri;
    }
  }

  static safeDecodeURI(uri) {
    try {
      return decodeURI(uri);
    } catch {
      return uri;
    }
  }

  static _decodedURIWithFunc(encodedURI, decodeFunc) {
    let uri = encodedURI;

    try {
      for (;;) {
        const decodedURI = decodeFunc(uri);
        if (decodedURI === uri) {
          return uri;
        }
        uri = decodedURI;
      }
    } catch {
      return encodedURI;
    }
  }

  static _decodeUriBeforeNormalize(uri, decodeFunc, settings) {
    const realUrl = uri.replace(/^\//, '');

    if (UriLib.isDoNotDecodeUri(realUrl, settings && settings.doNotDecodeUri)) {
      return realUrl;
    }
    if (UriLib.isUseOnlyOneDecodeUri(realUrl, settings && settings.onlyOneDecodeUri)) {
      return UriLib.safeDecodeURI(realUrl);
    }
    if (UriLib.isUseOnlyOneDecodeUriComponent(realUrl, settings && settings.onlyOneDecodeUriComponent)) {
      return UriLib.safeDecodeURIComponent(realUrl);
    }

    return decodeFunc(realUrl);
  }

  /**
   *
   * @param {string} uri
   * @param {object|undefined} settings - normalize settings
   * @param {Array<string>} settings.doNotDecodeUri - do not call decode uri for these urls
   * @param {Array<string>} settings.onlyOneDecodeUri - call decodeURI once for these urls
   * @param {Array<string>} settings.onlyOneDecodeUriComponent - call decodeURIComponent once for these urls
   * @param {Array<string>} settings.toLowerCase - make these urls to lower case
   * @param {Array<string>} settings.urlMustBeDecoded - force decode url
   * @param {Array<string>} settings.useP20InQuery - user %20 in these urls
   * @param {boolean} settings.restoreSearchParams - restore search params to the original
   * @param {boolean} settings.encodedBrackets - leave brackets encoded
   * @param {Array<string>|undefined} settings.ignoredQueryParameters - list of ignored parameters from url_parameters table with ignore: true
   * @param {Array<string>|undefined} settings.whitelistQueryParameters - list of whitelist parameters from url_parameters table with ignore: false
   * @param {boolean} settings.ignoreAllQueryParameters - remove all query parameters
   * @param {boolean} settings.sortQueryParameters - sort query parameters
   * @returns
   */
  static normalizeUri(uri, settings) {
    const decodedURI = UriLib._decodeUriBeforeNormalize(uri, UriLib.decodeURI, settings);
    let normalizedUri = UriLib._normalizeUri(decodedURI, uri, settings);
    if (settings && settings.restoreSearchParams) {
      normalizedUri = this.restoreSearchParams(normalizedUri, uri);
    }
    if (settings && settings.encodedBrackets) {
      normalizedUri = this.encodeBrackets(normalizedUri);
    }
    if (!isURL(normalizedUri, isURLOptions)) {
      throw Error('URL is invalid');
    }
    if (settings && settings.sortQueryParameters) {
      normalizedUri = UriLib.sortQueryParameters(normalizedUri);
    }
    return normalizedUri;
  }

  /**
   *
   * @param {string} uri
   * @param {object|undefined} settings - normalize settings
   * @param {Array<string>} settings.doNotDecodeUri - do not call decode uri for these urls
   * @param {Array<string>} settings.onlyOneDecodeUri - call decodeURI once for these urls
   * @param {Array<string>} settings.onlyOneDecodeUriComponent - call decodeURIComponent once for these urls
   * @param {Array<string>} settings.toLowerCase - make these urls to lower case
   * @param {Array<string>} settings.urlMustBeDecoded - force decode url
   * @param {Array<string>} settings.useP20InQuery - user %20 in these urls
   * @param {boolean} settings.restoreSearchParams - restore search params to the original
   * @param {Array<string>|undefined} settings.ignoredQueryParameters - list of ignored parameters from url_parameters table with ignore: true
   * @param {Array<string>|undefined} settings.whitelistQueryParameters - list of whitelist parameters from url_parameters table with ignore: false
   * @param {boolean} settings.ignoreAllQueryParameters - remove all query parameters
   * @param {boolean} settings.sortQueryParameters - sort query parameters
   * @returns
   */
  static normalizeUriComponent(uri, settings) {
    const decodedURI = UriLib._decodeUriBeforeNormalize(uri, UriLib.decodeURIComponent, settings);
    let normalizedUri = UriLib._normalizeUri(decodedURI, uri, settings);
    if (settings && settings.restoreSearchParams) {
      normalizedUri = this.restoreSearchParams(normalizedUri, uri);
    }
    if (settings && settings.encodedBrackets) {
      normalizedUri = this.encodeBrackets(normalizedUri);
    }
    if (!isURL(normalizedUri, isURLOptions)) {
      throw Error('URL is invalid');
    }
    if (settings && settings.sortQueryParameters) {
      normalizedUri = UriLib.sortQueryParameters(normalizedUri);
    }
    return normalizedUri;
  }

  static isDoNotDecodeUri(uri, sites) {
    const DO_NOT_DECODE_FOR_SITES = ['wix.com'];
    return (sites || DO_NOT_DECODE_FOR_SITES).some((s) => uri.includes(s));
  }

  static isUseOnlyOneDecodeUriComponent(uri, sites) {
    const ONLY_DECODE_URI_COMPONENTS_FOR_SITES = ['https://one.workfront.com/s/document-item'];
    return (sites || ONLY_DECODE_URI_COMPONENTS_FOR_SITES).some((value) => {
      return UriLib.safeDecodeURIComponent(uri).includes(value);
    });
  }

  static isUseOnlyOneDecodeUri(uri, sites) {
    const ONLY_DECODE_URI_FOR_SITES = [
      'insight.com',
      'skf.com',
      'odesk.com',
      'combatgent.com',
      'drizly.com',
      'upwork.com',
      'swiftontap.com',
      'azotelibrary.com',
    ];
    return (sites || ONLY_DECODE_URI_FOR_SITES).some((value) => {
      return uri.includes(value);
    });
  }

  static _normalizeUri(uri, rawUri, settings) {
    let decodedURI = uri;

    if (UriLib.needToLowerCaseUrl(uri, settings && settings.toLowerCase)) {
      decodedURI = uri.toLowerCase();
    }

    decodedURI = UriLib.encodeHash(decodedURI);
    decodedURI = UriLib.encodeHashBang(decodedURI);
    decodedURI = UriLib.removeEscapedFragment(decodedURI);
    decodedURI = UriLib.removeKnownTrackerQueryParams(decodedURI);

    if (settings) {
      if (settings.ignoreAllQueryParameters) {
        decodedURI = UriLib.deleteQueryParameters(decodedURI);
      } else {
        if (settings.whitelistQueryParameters) {
          decodedURI = UriLib.whitelistUserQueryParameters(decodedURI, settings.whitelistQueryParameters ?? []);
        } else {
          decodedURI = UriLib.removeUserIgnoredQueryParameters(decodedURI, settings.ignoredQueryParameters ?? []);
        }
      }
    }

    // seeing this on some urls and postgres doesnt like it
    decodedURI = decodedURI.replace('\u0000', '');
    if (UriLib.isUrlMustBeDecoded(decodedURI, settings && settings.urlMustBeDecoded)) {
      decodedURI = UriLib.decodeURI(decodedURI);
    }
    // new URL() after query manipulation uses + for space characters, for some users it should be %20
    if (
      UriLib.isUseP20InQuery(decodedURI, settings && settings.useP20InQuery) &&
      UriLib.isRawUriUsesP20InQuery(rawUri)
    ) {
      decodedURI = UriLib.replacePlusToP20InQuery(decodedURI);
    }
    return decodedURI;
  }

  static removeUserIgnoredQueryParameters(uri, ignoredParameters) {
    const parsedUrl = new URL(uri);

    const paramsToDelete = [];

    for (const [searchParam] of parsedUrl.searchParams.entries()) {
      for (const ignoredParam of ignoredParameters) {
        if (ignoredParam.includes('*')) {
          const regexp = wildcardToRegex(ignoredParam);
          if (regexp.test(searchParam)) {
            paramsToDelete.push(searchParam);
          }
        }
        if (searchParam === ignoredParam) {
          paramsToDelete.push(searchParam);
        }
      }
    }

    paramsToDelete.forEach((param) => {
      parsedUrl.searchParams.delete(param);
    });

    return parsedUrl.toString();
  }

  static whitelistUserQueryParameters(uri, whitelistQueryParameters) {
    const parsedUrl = new URL(uri);

    const paramsToDelete = [];

    for (const [searchParam] of parsedUrl.searchParams.entries()) {
      for (const whitelistParameter of whitelistQueryParameters) {
        if (whitelistParameter.includes('*')) {
          const regexp = wildcardToRegex(whitelistParameter);
          if (!regexp.test(searchParam)) {
            paramsToDelete.push(searchParam);
          }
        } else {
          if (searchParam !== whitelistParameter) {
            paramsToDelete.push(searchParam);
          }
        }
      }
    }

    paramsToDelete.forEach((param) => {
      parsedUrl.searchParams.delete(param);
    });

    return parsedUrl.toString();
  }

  static deleteQueryParameters(uri) {
    const parsedUrl = new URL(uri);

    parsedUrl.search = '';

    return parsedUrl.toString();
  }

  static removeKnownTrackerQueryParams(uri) {
    const parsedUrl = new URL(uri);

    const paramsToDelete = [];

    for (const [name] of parsedUrl.searchParams.entries()) {
      if (EXACT_TRACKER_QUERY_PARAMS.has(name)) {
        paramsToDelete.push(name);
      } else {
        for (const prefix of PREFIX_TRACKER_QUERY_PARAMS) {
          if (name.startsWith(prefix)) {
            paramsToDelete.push(name);
          }
        }
      }
    }

    paramsToDelete.forEach((param) => {
      parsedUrl.searchParams.delete(param);
    });

    return parsedUrl.toString();
  }

  static needToLowerCaseUrl(uri, sites) {
    const TO_LOWER_CASE_SITES = ['marcopromos.com'];
    return (sites || TO_LOWER_CASE_SITES).some((value) => {
      return uri.includes(value);
    });
  }

  // encode a # for a non #! URL so that we access it correctly
  static encodeHash(uri) {
    let encodedURI = uri;
    if (uri.indexOf('#!') === -1 && uri.indexOf('#') >= 0) {
      encodedURI = uri.replace(/#/g, '%23');
    }

    return encodedURI;
  }

  // if decoded url has two query params from a decoded escaped fragment for hashbang URLs
  static encodeHashBang(uri) {
    let encodedURI = uri;
    if (uri.indexOf('?') !== uri.lastIndexOf('?')) {
      encodedURI = `${uri.substr(0, uri.lastIndexOf('?'))}&${uri.substr(uri.lastIndexOf('?') + 1)}`;
    }
    return encodedURI;
  }

  static removeEscapedFragment(uri) {
    const parsedUrl = new URL(uri);
    // Remove the _escaped_fragment_ query parameter
    const escaped_fragment = parsedUrl.searchParams.get('_escaped_fragment_');
    if (escaped_fragment !== null) {
      if (escaped_fragment && !Array.isArray(escaped_fragment)) {
        parsedUrl.hash = `#!${escaped_fragment}`;
      }
      parsedUrl.searchParams.delete('_escaped_fragment_');
    }

    // Bing was seen accessing a URL like /?&_escaped_fragment_=
    parsedUrl.searchParams.delete('');

    let normalizeUri = parsedUrl.toString();
    normalizeUri = normalizeUri.replace(/_escaped_fragment_/i, '');
    return normalizeUri;
  }

  static fixHashBangUrl(uri) {
    return (
      uri.substring(0, uri.indexOf('?')) +
      uri.substring(uri.indexOf('#')) +
      uri.substring(uri.indexOf('?'), uri.indexOf('#'))
    );
  }

  static isUrlMustBeDecoded(uri, sites) {
    const URL_MUST_BE_DECODED_SITES = ['www.pryorcashman.com'];
    return (sites || URL_MUST_BE_DECODED_SITES).some((value) => {
      return uri.includes(value);
    });
  }

  static isUseP20InQuery(uri, sites) {
    const ONLY_DECODE_URI_COMPONENTS_FOR_SITES = ['xfinity.com'];
    return (sites || ONLY_DECODE_URI_COMPONENTS_FOR_SITES).some((value) => {
      return UriLib.safeDecodeURIComponent(uri).includes(value);
    });
  }

  static isRawUriUsesP20InQuery(rawUri) {
    if (!rawUri.includes('?')) {
      return false;
    }
    const [, search] = rawUri.split('?', 2);
    return search.includes('%20') && !search.includes('+');
  }

  static replacePlusToP20InQuery(encodedURI) {
    if (!encodedURI.includes('?')) {
      return encodedURI;
    }
    const [url, search] = encodedURI.split('?', 2);
    const searchP20 = search.replace(/\+/g, '%20');
    return `${url}?${searchP20}`;
  }

  static restoreSearchParams(uri, original) {
    const decodedURI = new URL(uri);
    const decodedSearchParams = decodedURI.searchParams;

    const originalSearchParams = getRawSearchParams(original.toString());

    const searchParams = [];
    decodedSearchParams.forEach((_val, key) => {
      searchParams.push(`${key}=${originalSearchParams.get(key)}`);
    });
    if (searchParams.length !== 0) {
      decodedURI.search = `?${searchParams.join('&')}`;
    }
    return decodedURI.toString();
  }

  static encodeBrackets(uri) {
    let resultUrl = uri;
    [
      { regexp: /\(/g, value: '%28' },
      { regexp: /\)/g, value: '%29' },
    ].forEach(({ regexp, value }) => {
      resultUrl = resultUrl.replace(regexp, value);
    });
    return resultUrl;
  }

  static isValidUrl(url) {
    return isURL(encodeURI(url), isURLOptions);
  }

  static sanitizeQueryParameterSeparator(url) {
    return url.replace(/&{2,}/g, '&');
  }

  static sortQueryParameters(url) {
    const queryDelimiter = url.indexOf('?');
    const [baseUrl, queryString] =
      queryDelimiter !== -1 ? [url.slice(0, queryDelimiter), url.slice(queryDelimiter + 1)] : ['', url];
    if (!queryString) {
      return url;
    }

    const urlSearchParams = new URLSearchParams(queryString);
    const params = Array.from(urlSearchParams.entries());

    if (params.length < 2) {
      return url;
    }

    params.sort((param1, param2) => param1[0].localeCompare(param2[0]));

    // Map over the sorted array of parameters:
    // - If the parameter value is an empty string, include only the key (eg: 'queryParam=')
    // - Otherwise, include both the key and value (eg: 'queryParam=123')
    // Join the array of parameter strings with '&' to form the final query string part with sorted query parameters
    const sortedQueryString = params
      .map((param) => {
        return param[1] === '' ? param[0] : param.join('=');
      })
      .join('&');

    return `${baseUrl}?${sortedQueryString}`;
  }
}

module.exports = UriLib;
