static.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. import type { BasicAcceptedElems } from './types.js';
  2. import type { CheerioAPI } from './load.js';
  3. import type { Cheerio } from './cheerio.js';
  4. import type { AnyNode, Document } from 'domhandler';
  5. import { textContent } from 'domutils';
  6. import {
  7. type InternalOptions,
  8. type CheerioOptions,
  9. flattenOptions as flattenOptions,
  10. } from './options.js';
  11. import type { ExtractedMap, ExtractMap } from './api/extract.js';
  12. /**
  13. * Helper function to render a DOM.
  14. *
  15. * @param that - Cheerio instance to render.
  16. * @param dom - The DOM to render. Defaults to `that`'s root.
  17. * @param options - Options for rendering.
  18. * @returns The rendered document.
  19. */
  20. function render(
  21. that: CheerioAPI,
  22. dom: BasicAcceptedElems<AnyNode> | undefined,
  23. options: InternalOptions,
  24. ): string {
  25. if (!that) return '';
  26. return that(dom ?? that._root.children, null, undefined, options).toString();
  27. }
  28. /**
  29. * Checks if a passed object is an options object.
  30. *
  31. * @param dom - Object to check if it is an options object.
  32. * @param options - Options object.
  33. * @returns Whether the object is an options object.
  34. */
  35. function isOptions(
  36. dom?: BasicAcceptedElems<AnyNode> | CheerioOptions | null,
  37. options?: CheerioOptions,
  38. ): dom is CheerioOptions {
  39. return (
  40. !options &&
  41. typeof dom === 'object' &&
  42. dom != null &&
  43. !('length' in dom) &&
  44. !('type' in dom)
  45. );
  46. }
  47. /**
  48. * Renders the document.
  49. *
  50. * @category Static
  51. * @param options - Options for the renderer.
  52. * @returns The rendered document.
  53. */
  54. export function html(this: CheerioAPI, options?: CheerioOptions): string;
  55. /**
  56. * Renders the document.
  57. *
  58. * @category Static
  59. * @param dom - Element to render.
  60. * @param options - Options for the renderer.
  61. * @returns The rendered document.
  62. */
  63. export function html(
  64. this: CheerioAPI,
  65. dom?: BasicAcceptedElems<AnyNode>,
  66. options?: CheerioOptions,
  67. ): string;
  68. export function html(
  69. this: CheerioAPI,
  70. dom?: BasicAcceptedElems<AnyNode> | CheerioOptions,
  71. options?: CheerioOptions,
  72. ): string {
  73. /*
  74. * Be flexible about parameters, sometimes we call html(),
  75. * with options as only parameter
  76. * check dom argument for dom element specific properties
  77. * assume there is no 'length' or 'type' properties in the options object
  78. */
  79. const toRender = isOptions(dom) ? ((options = dom), undefined) : dom;
  80. /*
  81. * Sometimes `$.html()` is used without preloading html,
  82. * so fallback non-existing options to the default ones.
  83. */
  84. const opts = {
  85. ...this?._options,
  86. ...flattenOptions(options),
  87. };
  88. return render(this, toRender, opts);
  89. }
  90. /**
  91. * Render the document as XML.
  92. *
  93. * @category Static
  94. * @param dom - Element to render.
  95. * @returns THe rendered document.
  96. */
  97. export function xml(
  98. this: CheerioAPI,
  99. dom?: BasicAcceptedElems<AnyNode>,
  100. ): string {
  101. const options = { ...this._options, xmlMode: true };
  102. return render(this, dom, options);
  103. }
  104. /**
  105. * Render the document as text.
  106. *
  107. * This returns the `textContent` of the passed elements. The result will
  108. * include the contents of `<script>` and `<style>` elements. To avoid this, use
  109. * `.prop('innerText')` instead.
  110. *
  111. * @category Static
  112. * @param elements - Elements to render.
  113. * @returns The rendered document.
  114. */
  115. export function text(
  116. this: CheerioAPI | void,
  117. elements?: ArrayLike<AnyNode>,
  118. ): string {
  119. const elems = elements ?? (this ? this.root() : []);
  120. let ret = '';
  121. for (let i = 0; i < elems.length; i++) {
  122. ret += textContent(elems[i]);
  123. }
  124. return ret;
  125. }
  126. /**
  127. * Parses a string into an array of DOM nodes. The `context` argument has no
  128. * meaning for Cheerio, but it is maintained for API compatibility with jQuery.
  129. *
  130. * @category Static
  131. * @param data - Markup that will be parsed.
  132. * @param context - Will be ignored. If it is a boolean it will be used as the
  133. * value of `keepScripts`.
  134. * @param keepScripts - If false all scripts will be removed.
  135. * @returns The parsed DOM.
  136. * @alias Cheerio.parseHTML
  137. * @see {@link https://api.jquery.com/jQuery.parseHTML/}
  138. */
  139. export function parseHTML(
  140. this: CheerioAPI,
  141. data: string,
  142. context?: unknown | boolean,
  143. keepScripts?: boolean,
  144. ): AnyNode[];
  145. export function parseHTML(this: CheerioAPI, data?: '' | null): null;
  146. export function parseHTML(
  147. this: CheerioAPI,
  148. data?: string | null,
  149. context?: unknown | boolean,
  150. keepScripts = typeof context === 'boolean' ? context : false,
  151. ): AnyNode[] | null {
  152. if (!data || typeof data !== 'string') {
  153. return null;
  154. }
  155. if (typeof context === 'boolean') {
  156. keepScripts = context;
  157. }
  158. const parsed = this.load(data, this._options, false);
  159. if (!keepScripts) {
  160. parsed('script').remove();
  161. }
  162. /*
  163. * The `children` array is used by Cheerio internally to group elements that
  164. * share the same parents. When nodes created through `parseHTML` are
  165. * inserted into previously-existing DOM structures, they will be removed
  166. * from the `children` array. The results of `parseHTML` should remain
  167. * constant across these operations, so a shallow copy should be returned.
  168. */
  169. return [...parsed.root()[0].children];
  170. }
  171. /**
  172. * Sometimes you need to work with the top-level root element. To query it, you
  173. * can use `$.root()`.
  174. *
  175. * @category Static
  176. * @example
  177. *
  178. * ```js
  179. * $.root().append('<ul id="vegetables"></ul>').html();
  180. * //=> <ul id="fruits">...</ul><ul id="vegetables"></ul>
  181. * ```
  182. *
  183. * @returns Cheerio instance wrapping the root node.
  184. * @alias Cheerio.root
  185. */
  186. export function root(this: CheerioAPI): Cheerio<Document> {
  187. return this(this._root);
  188. }
  189. /**
  190. * Checks to see if the `contained` DOM element is a descendant of the
  191. * `container` DOM element.
  192. *
  193. * @category Static
  194. * @param container - Potential parent node.
  195. * @param contained - Potential child node.
  196. * @returns Indicates if the nodes contain one another.
  197. * @alias Cheerio.contains
  198. * @see {@link https://api.jquery.com/jQuery.contains/}
  199. */
  200. export function contains(container: AnyNode, contained: AnyNode): boolean {
  201. // According to the jQuery API, an element does not "contain" itself
  202. if (contained === container) {
  203. return false;
  204. }
  205. /*
  206. * Step up the descendants, stopping when the root element is reached
  207. * (signaled by `.parent` returning a reference to the same object)
  208. */
  209. let next: AnyNode | null = contained;
  210. while (next && next !== next.parent) {
  211. next = next.parent;
  212. if (next === container) {
  213. return true;
  214. }
  215. }
  216. return false;
  217. }
  218. /**
  219. * Extract multiple values from a document, and store them in an object.
  220. *
  221. * @category Static
  222. * @param map - An object containing key-value pairs. The keys are the names of
  223. * the properties to be created on the object, and the values are the
  224. * selectors to be used to extract the values.
  225. * @returns An object containing the extracted values.
  226. */
  227. export function extract<M extends ExtractMap>(
  228. this: CheerioAPI,
  229. map: M,
  230. ): ExtractedMap<M> {
  231. return this.root().extract(map);
  232. }
  233. type Writable<T> = { -readonly [P in keyof T]: T[P] };
  234. /**
  235. * $.merge().
  236. *
  237. * @category Static
  238. * @param arr1 - First array.
  239. * @param arr2 - Second array.
  240. * @returns `arr1`, with elements of `arr2` inserted.
  241. * @alias Cheerio.merge
  242. * @see {@link https://api.jquery.com/jQuery.merge/}
  243. */
  244. export function merge<T>(
  245. arr1: Writable<ArrayLike<T>>,
  246. arr2: ArrayLike<T>,
  247. ): ArrayLike<T> | undefined {
  248. if (!isArrayLike(arr1) || !isArrayLike(arr2)) {
  249. return;
  250. }
  251. let newLength = arr1.length;
  252. const len = +arr2.length;
  253. for (let i = 0; i < len; i++) {
  254. arr1[newLength++] = arr2[i];
  255. }
  256. arr1.length = newLength;
  257. return arr1;
  258. }
  259. /**
  260. * Checks if an object is array-like.
  261. *
  262. * @category Static
  263. * @param item - Item to check.
  264. * @returns Indicates if the item is array-like.
  265. */
  266. function isArrayLike(item: unknown): item is ArrayLike<unknown> {
  267. if (Array.isArray(item)) {
  268. return true;
  269. }
  270. if (
  271. typeof item !== 'object' ||
  272. item === null ||
  273. !('length' in item) ||
  274. typeof item.length !== 'number' ||
  275. item.length < 0
  276. ) {
  277. return false;
  278. }
  279. for (let i = 0; i < item.length; i++) {
  280. if (!(i in item)) {
  281. return false;
  282. }
  283. }
  284. return true;
  285. }