parseUtils.js 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. const fs = require('fs');
  2. const _ = require('lodash');
  3. const acorn = require('acorn');
  4. const walk = require('acorn/dist/walk');
  5. module.exports = {
  6. parseBundle
  7. };
  8. function parseBundle(bundlePath) {
  9. const content = fs.readFileSync(bundlePath, 'utf8');
  10. const ast = acorn.parse(content, {
  11. sourceType: 'script',
  12. // I believe in a bright future of ECMAScript!
  13. // Actually, it's set to `2050` to support the latest ECMAScript version that currently exists.
  14. // Seems like `acorn` supports such weird option value.
  15. ecmaVersion: 2050
  16. });
  17. const walkState = {
  18. locations: null
  19. };
  20. walk.recursive(
  21. ast,
  22. walkState,
  23. {
  24. CallExpression(node, state, c) {
  25. if (state.locations) return;
  26. const args = node.arguments;
  27. // Main chunk with webpack loader.
  28. // Modules are stored in first argument:
  29. // (function (...) {...})(<modules>)
  30. if (
  31. node.callee.type === 'FunctionExpression' &&
  32. !node.callee.id &&
  33. args.length === 1 &&
  34. isSimpleModulesList(args[0])
  35. ) {
  36. state.locations = getModulesLocations(args[0]);
  37. return;
  38. }
  39. // Async Webpack < v4 chunk without webpack loader.
  40. // webpackJsonp([<chunks>], <modules>, ...)
  41. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  42. if (
  43. node.callee.type === 'Identifier' &&
  44. mayBeAsyncChunkArguments(args) &&
  45. isModulesList(args[1])
  46. ) {
  47. state.locations = getModulesLocations(args[1]);
  48. return;
  49. }
  50. // Async Webpack v4 chunk without webpack loader.
  51. // (window.webpackJsonp=window.webpackJsonp||[]).push([[<chunks>], <modules>, ...]);
  52. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  53. if (isAsyncChunkPushExpression(node)) {
  54. state.locations = getModulesLocations(args[0].elements[1]);
  55. return;
  56. }
  57. // Walking into arguments because some of plugins (e.g. `DedupePlugin`) or some Webpack
  58. // features (e.g. `umd` library output) can wrap modules list into additional IIFE.
  59. _.each(args, arg => c(arg, state));
  60. }
  61. }
  62. );
  63. let modules;
  64. if (walkState.locations) {
  65. modules = _.mapValues(walkState.locations,
  66. loc => content.slice(loc.start, loc.end)
  67. );
  68. } else {
  69. modules = {};
  70. }
  71. return {
  72. src: content,
  73. modules
  74. };
  75. }
  76. function isModulesList(node) {
  77. return (
  78. isSimpleModulesList(node) ||
  79. // Modules are contained in expression `Array([minimum ID]).concat([<module>, <module>, ...])`
  80. isOptimizedModulesArray(node)
  81. );
  82. }
  83. function isSimpleModulesList(node) {
  84. return (
  85. // Modules are contained in hash. Keys are module ids.
  86. isModulesHash(node) ||
  87. // Modules are contained in array. Indexes are module ids.
  88. isModulesArray(node)
  89. );
  90. }
  91. function isModulesHash(node) {
  92. return (
  93. node.type === 'ObjectExpression' &&
  94. _(node.properties)
  95. .map('value')
  96. .every(isModuleWrapper)
  97. );
  98. }
  99. function isModulesArray(node) {
  100. return (
  101. node.type === 'ArrayExpression' &&
  102. _.every(node.elements, elem =>
  103. // Some of array items may be skipped because there is no module with such id
  104. !elem ||
  105. isModuleWrapper(elem)
  106. )
  107. );
  108. }
  109. function isOptimizedModulesArray(node) {
  110. // Checking whether modules are contained in `Array(<minimum ID>).concat(...modules)` array:
  111. // https://github.com/webpack/webpack/blob/v1.14.0/lib/Template.js#L91
  112. // The `<minimum ID>` + array indexes are module ids
  113. return (
  114. node.type === 'CallExpression' &&
  115. node.callee.type === 'MemberExpression' &&
  116. // Make sure the object called is `Array(<some number>)`
  117. node.callee.object.type === 'CallExpression' &&
  118. node.callee.object.callee.type === 'Identifier' &&
  119. node.callee.object.callee.name === 'Array' &&
  120. node.callee.object.arguments.length === 1 &&
  121. isNumericId(node.callee.object.arguments[0]) &&
  122. // Make sure the property X called for `Array(<some number>).X` is `concat`
  123. node.callee.property.type === 'Identifier' &&
  124. node.callee.property.name === 'concat' &&
  125. // Make sure exactly one array is passed in to `concat`
  126. node.arguments.length === 1 &&
  127. isModulesArray(node.arguments[0])
  128. );
  129. }
  130. function isModuleWrapper(node) {
  131. return (
  132. // It's an anonymous function expression that wraps module
  133. ((node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') && !node.id) ||
  134. // If `DedupePlugin` is used it can be an ID of duplicated module...
  135. isModuleId(node) ||
  136. // or an array of shape [<module_id>, ...args]
  137. (node.type === 'ArrayExpression' && node.elements.length > 1 && isModuleId(node.elements[0]))
  138. );
  139. }
  140. function isModuleId(node) {
  141. return (node.type === 'Literal' && (isNumericId(node) || typeof node.value === 'string'));
  142. }
  143. function isNumericId(node) {
  144. return (node.type === 'Literal' && Number.isInteger(node.value) && node.value >= 0);
  145. }
  146. function isChunkIds(node) {
  147. // Array of numeric or string ids. Chunk IDs are strings when NamedChunksPlugin is used
  148. return (
  149. node.type === 'ArrayExpression' &&
  150. _.every(node.elements, isModuleId)
  151. );
  152. }
  153. function isAsyncChunkPushExpression(node) {
  154. const {
  155. callee,
  156. arguments: args
  157. } = node;
  158. return (
  159. callee.type === 'MemberExpression' &&
  160. callee.property.name === 'push' &&
  161. callee.object.type === 'AssignmentExpression' &&
  162. callee.object.left.object &&
  163. (
  164. callee.object.left.object.name === 'window' ||
  165. // Webpack 4 uses `this` instead of `window`
  166. callee.object.left.object.type === 'ThisExpression'
  167. ) &&
  168. args.length === 1 &&
  169. args[0].type === 'ArrayExpression' &&
  170. mayBeAsyncChunkArguments(args[0].elements) &&
  171. isModulesList(args[0].elements[1])
  172. );
  173. }
  174. function mayBeAsyncChunkArguments(args) {
  175. return (
  176. args.length >= 2 &&
  177. isChunkIds(args[0])
  178. );
  179. }
  180. function getModulesLocations(node) {
  181. if (node.type === 'ObjectExpression') {
  182. // Modules hash
  183. const modulesNodes = node.properties;
  184. return _.transform(modulesNodes, (result, moduleNode) => {
  185. const moduleId = moduleNode.key.name || moduleNode.key.value;
  186. result[moduleId] = getModuleLocation(moduleNode.value);
  187. }, {});
  188. }
  189. const isOptimizedArray = (node.type === 'CallExpression');
  190. if (node.type === 'ArrayExpression' || isOptimizedArray) {
  191. // Modules array or optimized array
  192. const minId = isOptimizedArray ?
  193. // Get the [minId] value from the Array() call first argument literal value
  194. node.callee.object.arguments[0].value :
  195. // `0` for simple array
  196. 0;
  197. const modulesNodes = isOptimizedArray ?
  198. // The modules reside in the `concat()` function call arguments
  199. node.arguments[0].elements :
  200. node.elements;
  201. return _.transform(modulesNodes, (result, moduleNode, i) => {
  202. if (!moduleNode) return;
  203. result[i + minId] = getModuleLocation(moduleNode);
  204. }, {});
  205. }
  206. return {};
  207. }
  208. function getModuleLocation(node) {
  209. return {
  210. start: node.start,
  211. end: node.end
  212. };
  213. }