verify.js 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. 'use strict'
  2. const BB = require('bluebird')
  3. const contentPath = require('./content/path')
  4. const finished = BB.promisify(require('mississippi').finished)
  5. const fixOwner = require('./util/fix-owner')
  6. const fs = require('graceful-fs')
  7. const glob = BB.promisify(require('glob'))
  8. const index = require('./entry-index')
  9. const path = require('path')
  10. const rimraf = BB.promisify(require('rimraf'))
  11. const ssri = require('ssri')
  12. BB.promisifyAll(fs)
  13. module.exports = verify
  14. function verify (cache, opts) {
  15. opts = opts || {}
  16. opts.log && opts.log.silly('verify', 'verifying cache at', cache)
  17. return BB.reduce([
  18. markStartTime,
  19. fixPerms,
  20. garbageCollect,
  21. rebuildIndex,
  22. cleanTmp,
  23. writeVerifile,
  24. markEndTime
  25. ], (stats, step, i) => {
  26. const label = step.name || `step #${i}`
  27. const start = new Date()
  28. return BB.resolve(step(cache, opts)).then(s => {
  29. s && Object.keys(s).forEach(k => {
  30. stats[k] = s[k]
  31. })
  32. const end = new Date()
  33. if (!stats.runTime) { stats.runTime = {} }
  34. stats.runTime[label] = end - start
  35. return stats
  36. })
  37. }, {}).tap(stats => {
  38. stats.runTime.total = stats.endTime - stats.startTime
  39. opts.log && opts.log.silly('verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms`)
  40. })
  41. }
  42. function markStartTime (cache, opts) {
  43. return { startTime: new Date() }
  44. }
  45. function markEndTime (cache, opts) {
  46. return { endTime: new Date() }
  47. }
  48. function fixPerms (cache, opts) {
  49. opts.log && opts.log.silly('verify', 'fixing cache permissions')
  50. return fixOwner.mkdirfix(cache, opts.uid, opts.gid).then(() => {
  51. // TODO - fix file permissions too
  52. return fixOwner.chownr(cache, opts.uid, opts.gid)
  53. }).then(() => null)
  54. }
  55. // Implements a naive mark-and-sweep tracing garbage collector.
  56. //
  57. // The algorithm is basically as follows:
  58. // 1. Read (and filter) all index entries ("pointers")
  59. // 2. Mark each integrity value as "live"
  60. // 3. Read entire filesystem tree in `content-vX/` dir
  61. // 4. If content is live, verify its checksum and delete it if it fails
  62. // 5. If content is not marked as live, rimraf it.
  63. //
  64. function garbageCollect (cache, opts) {
  65. opts.log && opts.log.silly('verify', 'garbage collecting content')
  66. const indexStream = index.lsStream(cache)
  67. const liveContent = new Set()
  68. indexStream.on('data', entry => {
  69. if (opts && opts.filter && !opts.filter(entry)) { return }
  70. liveContent.add(entry.integrity.toString())
  71. })
  72. return finished(indexStream).then(() => {
  73. const contentDir = contentPath._contentDir(cache)
  74. return glob(path.join(contentDir, '**'), {
  75. follow: false,
  76. nodir: true,
  77. nosort: true
  78. }).then(files => {
  79. return BB.resolve({
  80. verifiedContent: 0,
  81. reclaimedCount: 0,
  82. reclaimedSize: 0,
  83. badContentCount: 0,
  84. keptSize: 0
  85. }).tap((stats) => BB.map(files, (f) => {
  86. const split = f.split(/[/\\]/)
  87. const digest = split.slice(split.length - 3).join('')
  88. const algo = split[split.length - 4]
  89. const integrity = ssri.fromHex(digest, algo)
  90. if (liveContent.has(integrity.toString())) {
  91. return verifyContent(f, integrity).then(info => {
  92. if (!info.valid) {
  93. stats.reclaimedCount++
  94. stats.badContentCount++
  95. stats.reclaimedSize += info.size
  96. } else {
  97. stats.verifiedContent++
  98. stats.keptSize += info.size
  99. }
  100. return stats
  101. })
  102. } else {
  103. // No entries refer to this content. We can delete.
  104. stats.reclaimedCount++
  105. return fs.statAsync(f).then(s => {
  106. return rimraf(f).then(() => {
  107. stats.reclaimedSize += s.size
  108. return stats
  109. })
  110. })
  111. }
  112. }, {concurrency: opts.concurrency || 20}))
  113. })
  114. })
  115. }
  116. function verifyContent (filepath, sri) {
  117. return fs.statAsync(filepath).then(stat => {
  118. const contentInfo = {
  119. size: stat.size,
  120. valid: true
  121. }
  122. return ssri.checkStream(
  123. fs.createReadStream(filepath),
  124. sri
  125. ).catch(err => {
  126. if (err.code !== 'EINTEGRITY') { throw err }
  127. return rimraf(filepath).then(() => {
  128. contentInfo.valid = false
  129. })
  130. }).then(() => contentInfo)
  131. }).catch({code: 'ENOENT'}, () => ({size: 0, valid: false}))
  132. }
  133. function rebuildIndex (cache, opts) {
  134. opts.log && opts.log.silly('verify', 'rebuilding index')
  135. return index.ls(cache).then(entries => {
  136. const stats = {
  137. missingContent: 0,
  138. rejectedEntries: 0,
  139. totalEntries: 0
  140. }
  141. const buckets = {}
  142. for (let k in entries) {
  143. if (entries.hasOwnProperty(k)) {
  144. const hashed = index._hashKey(k)
  145. const entry = entries[k]
  146. const excluded = opts && opts.filter && !opts.filter(entry)
  147. excluded && stats.rejectedEntries++
  148. if (buckets[hashed] && !excluded) {
  149. buckets[hashed].push(entry)
  150. } else if (buckets[hashed] && excluded) {
  151. // skip
  152. } else if (excluded) {
  153. buckets[hashed] = []
  154. buckets[hashed]._path = index._bucketPath(cache, k)
  155. } else {
  156. buckets[hashed] = [entry]
  157. buckets[hashed]._path = index._bucketPath(cache, k)
  158. }
  159. }
  160. }
  161. return BB.map(Object.keys(buckets), key => {
  162. return rebuildBucket(cache, buckets[key], stats, opts)
  163. }, {concurrency: opts.concurrency || 20}).then(() => stats)
  164. })
  165. }
  166. function rebuildBucket (cache, bucket, stats, opts) {
  167. return fs.truncateAsync(bucket._path).then(() => {
  168. // This needs to be serialized because cacache explicitly
  169. // lets very racy bucket conflicts clobber each other.
  170. return BB.mapSeries(bucket, entry => {
  171. const content = contentPath(cache, entry.integrity)
  172. return fs.statAsync(content).then(() => {
  173. return index.insert(cache, entry.key, entry.integrity, {
  174. uid: opts.uid,
  175. gid: opts.gid,
  176. metadata: entry.metadata
  177. }).then(() => { stats.totalEntries++ })
  178. }).catch({code: 'ENOENT'}, () => {
  179. stats.rejectedEntries++
  180. stats.missingContent++
  181. })
  182. })
  183. })
  184. }
  185. function cleanTmp (cache, opts) {
  186. opts.log && opts.log.silly('verify', 'cleaning tmp directory')
  187. return rimraf(path.join(cache, 'tmp'))
  188. }
  189. function writeVerifile (cache, opts) {
  190. const verifile = path.join(cache, '_lastverified')
  191. opts.log && opts.log.silly('verify', 'writing verifile to ' + verifile)
  192. return fs.writeFileAsync(verifile, '' + (+(new Date())))
  193. }
  194. module.exports.lastRun = lastRun
  195. function lastRun (cache) {
  196. return fs.readFileAsync(
  197. path.join(cache, '_lastverified'), 'utf8'
  198. ).then(data => new Date(+data))
  199. }