parseUtils.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. 'use strict';
  2. var fs = require('fs');
  3. var _ = require('lodash');
  4. var acorn = require('acorn');
  5. var walk = require('acorn/dist/walk');
  6. module.exports = {
  7. parseBundle
  8. };
  9. function parseBundle(bundlePath) {
  10. var content = fs.readFileSync(bundlePath, 'utf8');
  11. var ast = acorn.parse(content, {
  12. sourceType: 'script',
  13. // I believe in a bright future of ECMAScript!
  14. // Actually, it's set to `2050` to support the latest ECMAScript version that currently exists.
  15. // Seems like `acorn` supports such weird option value.
  16. ecmaVersion: 2050
  17. });
  18. var walkState = {
  19. locations: null
  20. };
  21. walk.recursive(ast, walkState, {
  22. CallExpression(node, state, c) {
  23. if (state.locations) return;
  24. var args = node.arguments;
  25. // Main chunk with webpack loader.
  26. // Modules are stored in first argument:
  27. // (function (...) {...})(<modules>)
  28. if (node.callee.type === 'FunctionExpression' && !node.callee.id && args.length === 1 && isSimpleModulesList(args[0])) {
  29. state.locations = getModulesLocations(args[0]);
  30. return;
  31. }
  32. // Async Webpack < v4 chunk without webpack loader.
  33. // webpackJsonp([<chunks>], <modules>, ...)
  34. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  35. if (node.callee.type === 'Identifier' && mayBeAsyncChunkArguments(args) && isModulesList(args[1])) {
  36. state.locations = getModulesLocations(args[1]);
  37. return;
  38. }
  39. // Async Webpack v4 chunk without webpack loader.
  40. // (window.webpackJsonp=window.webpackJsonp||[]).push([[<chunks>], <modules>, ...]);
  41. // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.
  42. if (isAsyncChunkPushExpression(node)) {
  43. state.locations = getModulesLocations(args[0].elements[1]);
  44. return;
  45. }
  46. // Walking into arguments because some of plugins (e.g. `DedupePlugin`) or some Webpack
  47. // features (e.g. `umd` library output) can wrap modules list into additional IIFE.
  48. _.each(args, function (arg) {
  49. return c(arg, state);
  50. });
  51. }
  52. });
  53. var modules = void 0;
  54. if (walkState.locations) {
  55. modules = _.mapValues(walkState.locations, function (loc) {
  56. return content.slice(loc.start, loc.end);
  57. });
  58. } else {
  59. modules = {};
  60. }
  61. return {
  62. src: content,
  63. modules
  64. };
  65. }
  66. function isModulesList(node) {
  67. return isSimpleModulesList(node) ||
  68. // Modules are contained in expression `Array([minimum ID]).concat([<module>, <module>, ...])`
  69. isOptimizedModulesArray(node);
  70. }
  71. function isSimpleModulesList(node) {
  72. return (
  73. // Modules are contained in hash. Keys are module ids.
  74. isModulesHash(node) ||
  75. // Modules are contained in array. Indexes are module ids.
  76. isModulesArray(node)
  77. );
  78. }
  79. function isModulesHash(node) {
  80. return node.type === 'ObjectExpression' && _(node.properties).map('value').every(isModuleWrapper);
  81. }
  82. function isModulesArray(node) {
  83. return node.type === 'ArrayExpression' && _.every(node.elements, function (elem) {
  84. return (
  85. // Some of array items may be skipped because there is no module with such id
  86. !elem || isModuleWrapper(elem)
  87. );
  88. });
  89. }
  90. function isOptimizedModulesArray(node) {
  91. // Checking whether modules are contained in `Array(<minimum ID>).concat(...modules)` array:
  92. // https://github.com/webpack/webpack/blob/v1.14.0/lib/Template.js#L91
  93. // The `<minimum ID>` + array indexes are module ids
  94. return node.type === 'CallExpression' && node.callee.type === 'MemberExpression' &&
  95. // Make sure the object called is `Array(<some number>)`
  96. node.callee.object.type === 'CallExpression' && node.callee.object.callee.type === 'Identifier' && node.callee.object.callee.name === 'Array' && node.callee.object.arguments.length === 1 && isNumericId(node.callee.object.arguments[0]) &&
  97. // Make sure the property X called for `Array(<some number>).X` is `concat`
  98. node.callee.property.type === 'Identifier' && node.callee.property.name === 'concat' &&
  99. // Make sure exactly one array is passed in to `concat`
  100. node.arguments.length === 1 && isModulesArray(node.arguments[0]);
  101. }
  102. function isModuleWrapper(node) {
  103. return (
  104. // It's an anonymous function expression that wraps module
  105. (node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') && !node.id ||
  106. // If `DedupePlugin` is used it can be an ID of duplicated module...
  107. isModuleId(node) ||
  108. // or an array of shape [<module_id>, ...args]
  109. node.type === 'ArrayExpression' && node.elements.length > 1 && isModuleId(node.elements[0])
  110. );
  111. }
  112. function isModuleId(node) {
  113. return node.type === 'Literal' && (isNumericId(node) || typeof node.value === 'string');
  114. }
  115. function isNumericId(node) {
  116. return node.type === 'Literal' && Number.isInteger(node.value) && node.value >= 0;
  117. }
  118. function isChunkIds(node) {
  119. // Array of numeric or string ids. Chunk IDs are strings when NamedChunksPlugin is used
  120. return node.type === 'ArrayExpression' && _.every(node.elements, isModuleId);
  121. }
  122. function isAsyncChunkPushExpression(node) {
  123. var callee = node.callee,
  124. args = node.arguments;
  125. return callee.type === 'MemberExpression' && callee.property.name === 'push' && callee.object.type === 'AssignmentExpression' && callee.object.left.object && (callee.object.left.object.name === 'window' ||
  126. // Webpack 4 uses `this` instead of `window`
  127. callee.object.left.object.type === 'ThisExpression') && args.length === 1 && args[0].type === 'ArrayExpression' && mayBeAsyncChunkArguments(args[0].elements) && isModulesList(args[0].elements[1]);
  128. }
  129. function mayBeAsyncChunkArguments(args) {
  130. return args.length >= 2 && isChunkIds(args[0]);
  131. }
  132. function getModulesLocations(node) {
  133. if (node.type === 'ObjectExpression') {
  134. // Modules hash
  135. var modulesNodes = node.properties;
  136. return _.transform(modulesNodes, function (result, moduleNode) {
  137. var moduleId = moduleNode.key.name || moduleNode.key.value;
  138. result[moduleId] = getModuleLocation(moduleNode.value);
  139. }, {});
  140. }
  141. var isOptimizedArray = node.type === 'CallExpression';
  142. if (node.type === 'ArrayExpression' || isOptimizedArray) {
  143. // Modules array or optimized array
  144. var minId = isOptimizedArray ?
  145. // Get the [minId] value from the Array() call first argument literal value
  146. node.callee.object.arguments[0].value :
  147. // `0` for simple array
  148. 0;
  149. var _modulesNodes = isOptimizedArray ?
  150. // The modules reside in the `concat()` function call arguments
  151. node.arguments[0].elements : node.elements;
  152. return _.transform(_modulesNodes, function (result, moduleNode, i) {
  153. if (!moduleNode) return;
  154. result[i + minId] = getModuleLocation(moduleNode);
  155. }, {});
  156. }
  157. return {};
  158. }
  159. function getModuleLocation(node) {
  160. return {
  161. start: node.start,
  162. end: node.end
  163. };
  164. }