All files / src parser.ts

97.05% Statements 66/68
88.57% Branches 31/35
100% Functions 6/6
97.05% Lines 66/68

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173  4x 4x   4x 4x                 4x             4x 240x 240x 4x     236x                   4x 226x 170x   56x 56x         6x 6x 6x       6x 6x 6x 2x       6x       222x 222x   4x                           4x       198x   198x 222x 222x 222x   222x   222x 192x   222x 222x 222x 222x   218x 4x     214x 24x 20x     210x 210x 210x   210x 210x 52x     210x 210x 210x       210x   210x   56x     210x 4x         206x 8x           198x                       174x   2x 4x       4x         174x    
import { ParseRoutesOptions, Protocol, Route, RouteParam } from './types'
import { validateProtocol } from './validation'
import { InvalidPatternError } from './errors'
 
const WILDCARD_HOSTNAME_PLACEHOLDER = 'wildcard'
const PROTOCOL_SEPARATOR = '://'
 
export type UrlParts = {
  protocolPrefix: string
  hostname: string
  rest: string
}
 
// (protocol)://(hostname)(rest including path, query, fragment)
const URL_PARTS_PATTERN = /^([a-z0-9+\-.]+:\/\/)([^/]*)(.*)$/i
 
/**
 * The reason we require manually parsing URLs instead of just passing it to URL()
 * is that URL() constructor in the browser cannot handle parsing wildcards like `*.example.com`.
 * Which is not obvious, since new URL('*.example.com') works in Node.
 */
export function splitUrlInput(urlInput: string): UrlParts | null {
  const match = URL_PARTS_PATTERN.exec(urlInput)
  if (!match) {
    return null
  }
 
  return {
    protocolPrefix: match[1],
    hostname: match[2],
    rest: match[3],
  }
}
 
/**
 * We need this to replace `*` with a placeholder in the hostname so that the URL can be parsed by the URL() constructor.
 */
export function normalizeWildcardHostname(urlInput: string, urlParts: UrlParts | null): string {
  if (!urlParts?.hostname.startsWith('*')) {
    return urlInput
  }
  const wildcardHostname = `${WILDCARD_HOSTNAME_PLACEHOLDER}${urlParts.hostname.slice(1)}`
  return `${urlParts.protocolPrefix}${wildcardHostname}${urlParts.rest}`
}
 
function routeSpecificity(hostname: string, pathname: string) {
  // Adapted from internal config service routing table implementation
  const hostParts = hostname.split('.')
  let hostScore = hostParts.length
  Iif (hostParts[0] === '*') {
    hostScore -= 2
  }
 
  const pathParts = pathname.split('/')
  let pathScore = pathParts.length
  if (pathParts[pathParts.length - 1] === '*') {
    pathScore -= 2
  }
 
  // The magic 26 comes directly from the cloudflare algorithm from workers-sdk
  return hostScore * 26 + pathScore
}
 
function parsePatternUrl(pattern: string): URL {
  try {
    return new URL(pattern)
  } catch {
    throw new InvalidPatternError(`Pattern ${pattern} is not a valid URL`, 'ERR_INVALID_URL')
  }
}
 
/**
 * Parses a list of route strings into an array of Route objects that contain detailed route information.
 *
 * @param {RouteParam[]} allRoutes - An array of route strings to be parsed. Each route string can contain protocols, hostnames, and paths.
 * @param {ParseRoutesOptions} options - Optional options.
 * @return {Route[]} An array of parsed Route objects with details such as hostname, path, and protocol.
 *
 * @throws {InvalidProtocolError} If provided URL protocol in one of the routes is not `http:` or `https:`.
 * @throws {InvalidPatternError} If a route contains a query string or infix wildcard which is not allowed.
 */
export function parseRoutes<Metadata>(
  allRoutes: RouteParam<Metadata>[],
  { sortBySpecificity = false }: ParseRoutesOptions = {}
): Route<Metadata>[] {
  const routes: Route<Metadata>[] = []
 
  for (const rawRoute of allRoutes) {
    const route = typeof rawRoute === 'string' ? rawRoute : rawRoute.url
    const metadata = typeof rawRoute === 'string' ? undefined : rawRoute.metadata
    const hasProtocol = route.indexOf(PROTOCOL_SEPARATOR) > 0
 
    let urlInput = route
    // If route is missing a protocol, give it one so it parses
    if (!hasProtocol) {
      urlInput = `https://${urlInput}`
    }
    const urlParts = splitUrlInput(urlInput)
    const rawHostname = urlParts?.hostname ?? ''
    const urlInputForParse = normalizeWildcardHostname(urlInput, urlParts)
    const url = parsePatternUrl(urlInputForParse)
 
    if (!urlParts?.hostname) {
      throw new InvalidPatternError(`Route "${route}" is missing a hostname. This is not allowed.`, 'ERR_INVALID_URL')
    }
    let protocol: Protocol | undefined
    if (hasProtocol) {
      validateProtocol(url.protocol)
      protocol = url.protocol
    }
 
    const anyHostname = rawHostname === '*'
    const specificity = sortBySpecificity ? routeSpecificity(rawHostname, url.pathname) : undefined
    let hostname = url.hostname
 
    const hasWildcardHostnamePrefix = rawHostname.startsWith('*')
    if (hasWildcardHostnamePrefix && !anyHostname) {
      hostname = hostname.substring(WILDCARD_HOSTNAME_PLACEHOLDER.length)
    }
 
    const hasWildcardPathSuffix = url.pathname.endsWith('*')
    const pathContainsWildcard = url.pathname.includes('*')
    const hostnameHasInfixWildcard = hasWildcardHostnamePrefix
      ? rawHostname.slice(1).includes('*')
      : rawHostname.includes('*')
    const pathHasInfixWildcard =
      pathContainsWildcard && (!hasWildcardPathSuffix || url.pathname.slice(0, -1).includes('*'))
 
    if (hasWildcardPathSuffix) {
      // Remove trailing "*"
      url.pathname = url.pathname.substring(0, url.pathname.length - 1)
    }
 
    if (url.search) {
      throw new InvalidPatternError(
        `Route "${route}" contains a query string. This is not allowed.`,
        'ERR_QUERY_STRING'
      )
    }
    if (hostnameHasInfixWildcard || pathHasInfixWildcard) {
      throw new InvalidPatternError(
        `Route "${route}" contains an infix wildcard. This is not allowed.`,
        'ERR_INFIX_WILDCARD'
      )
    }
 
    routes.push({
      route,
      metadata,
      specificity,
      protocol,
      wildcardHostnamePrefix: hasWildcardHostnamePrefix,
      hostname: anyHostname ? '' : hostname,
      path: url.pathname,
      wildcardPathSuffix: hasWildcardPathSuffix,
    })
  }
 
  if (sortBySpecificity) {
    // Sort with the highest specificity first
    routes.sort((a, b) => {
      Iif (a.specificity === b.specificity) {
        // If routes are equally specific, sort by the longest route first
        return b.route.length - a.route.length
      } else {
        return b.specificity! - a.specificity!
      }
    })
  }
 
  return routes
}