MongoDB

Interview Queries 2

20 Queries

Problem: The employees collection has a managerId field. Find all direct and indirect reports of a given manager (full subtree traversal).
// employees collection structure:
// { _id: ObjectId, name: "Alice", managerId: ObjectId | null, level: 1 }

// Find all reports under manager with id "managerId"
db.employees.aggregate([
  { $match: { _id: ObjectId("64a1b2c3d4e5f6789abcdef0") } },  // start from manager
  {
    $graphLookup: {
      from: "employees",
      startWith: "$_id",
      connectFromField: "_id",
      connectToField: "managerId",
      as: "allReports",
      maxDepth: 10,               // limit traversal depth
      depthField: "reportLevel",  // adds depth info to each result
      restrictSearchWithMatch: { isActive: true }  // only active employees
    }
  },
  {
    $project: {
      _id: 0,
      manager: "$name",
      totalReports: { $size: "$allReports" },
      reportTree: {
        $sortArray: { input: "$allReports", sortBy: { reportLevel: 1 } }
      }
    }
  }
])

// Find ancestors (chain of managers) for a given employee
db.employees.aggregate([
  { $match: { _id: ObjectId("64a1b2c3d4e5f6789abcdef9") } },
  {
    $graphLookup: {
      from: "employees",
      startWith: "$managerId",
      connectFromField: "managerId",
      connectToField: "_id",
      as: "managementChain",
      depthField: "level"
    }
  }
])

Problem: Build a product search results page that simultaneously returns: (1) paginated results, (2) category count distribution, (3) price range buckets, and (4) total match count — all in one MongoDB query.
db.products.aggregate([
  { $match: { isActive: true, name: { $regex: "laptop", $options: "i" } } },
  {
    $facet: {
      // Paginated results
      results: [
        { $sort: { rating: -1, price: 1 } },
        { $skip: 0 },
        { $limit: 20 },
        { $project: { name: 1, price: 1, category: 1, rating: 1 } }
      ],
      // Category distribution
      categories: [
        { $group: { _id: "$category", count: { $sum: 1 } } },
        { $sort: { count: -1 } }
      ],
      // Price buckets
      priceBuckets: [
        {
          $bucket: {
            groupBy: "$price",
            boundaries: [0, 500, 1000, 2500, 5000, 10000],
            default: "10000+",
            output: { count: { $sum: 1 }, avgPrice: { $avg: "$price" } }
          }
        }
      ],
      // Total count
      totalCount: [
        { $count: "count" }
      ],
      // Brand distribution
      brands: [
        { $group: { _id: "$brand", count: { $sum: 1 } } },
        { $sort: { count: -1 } },
        { $limit: 10 }
      ]
    }
  }
])

Problem: Every night, compute a daily analytics summary (total orders, revenue, new users) and store the results in a dailyStats collection. Use $merge to upsert without deleting previous data.
db.orders.aggregate([
  {
    $match: {
      createdAt: {
        $gte: new Date("2024-01-15"),
        $lt:  new Date("2024-01-16")
      }
    }
  },
  {
    $group: {
      _id: { $dateToString: { format: "%Y-%m-%d", date: "$createdAt" } },
      totalOrders: { $sum: 1 },
      totalRevenue: { $sum: "$amount" },
      avgOrderValue: { $avg: "$amount" },
      completedOrders: {
        $sum: { $cond: [{ $eq: ["$status", "completed"] }, 1, 0] }
      }
    }
  },
  {
    $merge: {
      into: "dailyStats",           // target collection
      on: "_id",                    // merge key (match by date)
      whenMatched: "merge",         // merge fields if document exists
      whenNotMatched: "insert"      // insert if no match
    }
  }
])

// Can be scheduled as a cron job — safe to run multiple times
// $merge options:
// whenMatched:    "replace" | "merge" | "keepExisting" | "fail" | [pipeline]
// whenNotMatched: "insert" | "discard" | "fail"

Problem: Users have a location field (GeoJSON Point). Find all orders placed by users within 10 km of a given store location, including distance from the store.
// Users collection has: { location: { type: "Point", coordinates: [lng, lat] } }
// Ensure 2dsphere index exists
db.users.createIndex({ location: "2dsphere" });

// Step 1: Find nearby users with distance
const storeCoords = [-73.9857, 40.7484];  // [lng, lat] of the store

const nearbyUserIds = await db.collection('users').aggregate([
  {
    $geoNear: {
      near: { type: "Point", coordinates: storeCoords },
      distanceField: "distanceMeters",
      maxDistance: 10000,        // 10 km in meters
      spherical: true,
      query: { isActive: true }  // optional pre-filter
    }
  },
  {
    $project: {
      _id: 1,
      name: 1,
      distanceKm: { $divide: ["$distanceMeters", 1000] }
    }
  }
]).toArray();

// Step 2: Fetch orders for those users
const ids = nearbyUserIds.map(u => u._id);
const orders = await db.collection('orders').find({
  customerId: { $in: ids },
  status: "completed"
}).sort({ createdAt: -1 }).toArray();

// All-in-one pipeline with $lookup
db.users.aggregate([
  { $geoNear: { near: { type: "Point", coordinates: storeCoords }, distanceField: "dist", maxDistance: 10000, spherical: true } },
  { $lookup: { from: "orders", localField: "_id", foreignField: "customerId", as: "orders" } },
  { $match: { "orders.0": { $exists: true } } }
])

Problem: Build a flexible time-series report for orders showing weekly, monthly, and quarterly totals simultaneously using a single aggregation pipeline.
db.orders.aggregate([
  { $match: { status: "completed", createdAt: { $gte: new Date("2024-01-01") } } },
  {
    $addFields: {
      week:    { $week: "$createdAt" },
      month:   { $month: "$createdAt" },
      quarter: {
        $ceil: { $divide: [{ $month: "$createdAt" }, 3] }
      },
      year: { $year: "$createdAt" }
    }
  },
  {
    $facet: {
      byWeek: [
        { $group: { _id: { year: "$year", week: "$week" }, revenue: { $sum: "$amount" }, count: { $sum: 1 } } },
        { $sort: { "_id.year": 1, "_id.week": 1 } }
      ],
      byMonth: [
        { $group: { _id: { year: "$year", month: "$month" }, revenue: { $sum: "$amount" }, count: { $sum: 1 } } },
        { $sort: { "_id.year": 1, "_id.month": 1 } }
      ],
      byQuarter: [
        { $group: { _id: { year: "$year", quarter: "$quarter" }, revenue: { $sum: "$amount" }, count: { $sum: 1 } } },
        { $sort: { "_id.year": 1, "_id.quarter": 1 } }
      ]
    }
  }
])

// For date truncation (MongoDB 5.0+) — cleaner approach:
{ $dateTrunc: { date: "$createdAt", unit: "week" } }
{ $dateTrunc: { date: "$createdAt", unit: "month" } }
{ $dateTrunc: { date: "$createdAt", unit: "quarter" } }

Problem: Generate a data quality report: find all users missing any required profile fields (phone, address, dateOfBirth), and count how many users are missing each field.
// Find users with any missing required field
db.users.find({
  $or: [
    { phone: { $exists: false } },
    { phone: null },
    { "address.street": { $exists: false } },
    { "address.city": null },
    { dateOfBirth: { $exists: false } },
    { profileComplete: false }
  ]
}, { name: 1, email: 1, phone: 1, address: 1, dateOfBirth: 1 })

// Count missing fields per field name (data quality dashboard)
db.users.aggregate([
  {
    $facet: {
      missingPhone: [
        { $match: { $or: [{ phone: { $exists: false } }, { phone: null }] } },
        { $count: "count" }
      ],
      missingAddress: [
        { $match: { $or: [{ address: { $exists: false } }, { address: null }] } },
        { $count: "count" }
      ],
      missingDOB: [
        { $match: { $or: [{ dateOfBirth: { $exists: false } }, { dateOfBirth: null }] } },
        { $count: "count" }
      ],
      totalUsers: [ { $count: "count" } ]
    }
  }
])

// Remove null fields from all documents
db.users.updateMany(
  { phone: null },
  { $unset: { phone: "" } }
)

Problem: Calculate Customer Lifetime Value: total revenue per customer, average order frequency, and customer tenure in days. Segment customers into High/Medium/Low CLV tiers.
db.orders.aggregate([
  { $match: { status: "completed" } },
  {
    $group: {
      _id: "$customerId",
      totalRevenue: { $sum: "$amount" },
      orderCount: { $sum: 1 },
      avgOrderValue: { $avg: "$amount" },
      firstOrder: { $min: "$createdAt" },
      lastOrder: { $max: "$createdAt" }
    }
  },
  {
    $addFields: {
      tenureDays: {
        $divide: [{ $subtract: ["$$NOW", "$firstOrder"] }, 86400000]
      },
      avgMonthlyRevenue: {
        $divide: [
          "$totalRevenue",
          { $add: [{ $divide: [{ $subtract: ["$$NOW", "$firstOrder"] }, 2592000000] }, 1] }
        ]
      }
    }
  },
  {
    $addFields: {
      clvTier: {
        $switch: {
          branches: [
            { case: { $gte: ["$totalRevenue", 10000] }, then: "High" },
            { case: { $gte: ["$totalRevenue", 3000] }, then: "Medium" }
          ],
          default: "Low"
        }
      }
    }
  },
  {
    $lookup: {
      from: "users",
      localField: "_id",
      foreignField: "_id",
      as: "user"
    }
  },
  { $unwind: "$user" },
  { $sort: { totalRevenue: -1 } }
])

Problem: For each product, find all active discount codes that apply to it — matching either by productId OR by category, and only if the discount is not expired.
// Standard $lookup can only match on equality of one field.
// Use pipeline $lookup for complex/conditional joins.

db.products.aggregate([
  { $match: { isActive: true } },
  {
    $lookup: {
      from: "discounts",
      let: { prodId: "$_id", prodCategory: "$category" },  // define variables
      pipeline: [
        {
          $match: {
            $expr: {
              $and: [
                { $gt: ["$expiresAt", "$$NOW"] },       // not expired
                { $eq: ["$isActive", true] },
                {
                  $or: [
                    { $in: ["$$prodId", "$applicableProducts"] },    // product-specific
                    { $in: ["$$prodCategory", "$applicableCategories"] }  // category-wide
                  ]
                }
              ]
            }
          }
        },
        { $project: { code: 1, discountPct: 1, expiresAt: 1 } }
      ],
      as: "activeDiscounts"
    }
  },
  {
    $project: {
      name: 1,
      price: 1,
      category: 1,
      discountCount: { $size: "$activeDiscounts" },
      bestDiscount: { $max: "$activeDiscounts.discountPct" },
      activeDiscounts: 1
    }
  },
  { $match: { discountCount: { $gt: 0 } } }  // only show discounted products
])

Problem: Flag orders as potential fraud using multiple conditions: order amount > 3× customer's average, order placed at unusual hour (2AM–5AM), or shipping address differs from IP country. Score each order with a risk score.
db.orders.aggregate([
  {
    $lookup: {
      from: "customerStats",  // pre-computed avg order value per customer
      localField: "customerId",
      foreignField: "_id",
      as: "stats"
    }
  },
  { $unwind: { path: "$stats", preserveNullAndEmpty: true } },
  {
    $addFields: {
      orderHour: { $hour: { date: "$createdAt", timezone: "UTC" } },
      riskScore: {
        $add: [
          // High amount risk: +40 if order > 3x customer's avg
          {
            $cond: [
              { $gt: ["$amount", { $multiply: [{ $ifNull: ["$stats.avgOrderValue", 500] }, 3] }] },
              40, 0
            ]
          },
          // Unusual hour: +30 if ordered between 2AM-5AM
          {
            $cond: [
              { $and: [{ $gte: [{ $hour: "$createdAt" }, 2] }, { $lte: [{ $hour: "$createdAt" }, 5] }] },
              30, 0
            ]
          },
          // New account: +20 if account created less than 24h ago
          {
            $cond: [
              { $lt: [{ $subtract: ["$createdAt", "$customerCreatedAt"] }, 86400000] },
              20, 0
            ]
          },
          // Country mismatch: +10
          {
            $cond: [{ $ne: ["$shippingCountry", "$ipCountry"] }, 10, 0]
          }
        ]
      }
    }
  },
  {
    $addFields: {
      riskLevel: {
        $switch: {
          branches: [
            { case: { $gte: ["$riskScore", 70] }, then: "HIGH" },
            { case: { $gte: ["$riskScore", 40] }, then: "MEDIUM" }
          ],
          default: "LOW"
        }
      }
    }
  },
  { $match: { riskLevel: { $ne: "LOW" } } },
  { $sort: { riskScore: -1 } }
])

Problem: Blog posts have a tags array that may contain duplicates and mixed-case entries (e.g., "JavaScript", "javascript", "JAVASCRIPT"). Normalize all tags to lowercase and remove duplicates across the entire collection.
// Normalize tags in all documents using aggregation update pipeline (MongoDB 4.2+)
await db.collection('posts').updateMany(
  { tags: { $exists: true, $ne: [] } },
  [
    {
      $set: {
        tags: {
          $setUnion: [  // $setUnion removes duplicates
            {
              $map: {
                input: "$tags",
                as: "tag",
                in: { $toLower: "$$tag" }  // normalize to lowercase
              }
            },
            []  // $setUnion requires two arrays
          ]
        }
      }
    }
  ]
);

// Count tag frequency across all posts
db.posts.aggregate([
  { $unwind: "$tags" },
  { $addFields: { tag: { $toLower: "$tags" } } },
  { $group: { _id: "$tag", postCount: { $sum: 1 } } },
  { $sort: { postCount: -1 } },
  { $limit: 20 }
])

// Find posts that share at least 2 tags  
db.posts.aggregate([
  { $project: { tags: { $map: { input: "$tags", as: "t", in: { $toLower: "$$t" } } } } },
  {
    $lookup: {
      from: "posts",
      let: { myTags: "$tags", myId: "$_id" },
      pipeline: [
        { $match: { $expr: { $and: [{ $ne: ["$_id", "$$myId"] }, { $gte: [{ $size: { $setIntersection: ["$tags", "$$myTags"] } }, 2] }] } } }
      ],
      as: "relatedPosts"
    }
  }
])

Problem: Analyze the distribution of product prices using fixed buckets (for display) and auto-computed buckets (for statistical analysis). Identify the price ranges that contain the most products.
// Fixed price buckets (manually defined)
db.products.aggregate([
  { $match: { isActive: true } },
  {
    $bucket: {
      groupBy: "$price",
      boundaries: [0, 100, 500, 1000, 5000, 10000, 50000],
      default: "50000+",
      output: {
        count: { $sum: 1 },
        avgPrice: { $avg: "$price" },
        minPrice: { $min: "$price" },
        maxPrice: { $max: "$price" },
        products: { $push: "$name" }
      }
    }
  }
])

// Auto buckets — MongoDB figures out optimal boundaries
db.products.aggregate([
  { $match: { isActive: true } },
  {
    $bucketAuto: {
      groupBy: "$price",
      buckets: 5,           // number of buckets
      granularity: "R20",   // ISO standard rounding series
      output: {
        count: { $sum: 1 },
        avgPrice: { $avg: "$price" },
        totalValue: { $sum: "$price" }
      }
    }
  }
])

// Percentile distribution
db.products.aggregate([
  {
    $setWindowFields: {
      sortBy: { price: 1 },
      output: {
        percentileRank: { $percentRank: {} }
      }
    }
  },
  { $match: { percentileRank: { $gte: 0.9 } } }  // top 10% by price
])

Problem: When a high-value order (>$5000) is placed or when an order status changes to "refunded", send real-time notifications to the admin dashboard via WebSocket.
const { MongoClient } = require('mongodb');

async function startOrderChangeStream(io) {  // io = Socket.io instance
  const client = await MongoClient.connect(process.env.MONGODB_URI);
  const db = client.db('shopdb');
  
  // Only watch for specific high-value events
  const pipeline = [
    {
      $match: {
        $or: [
          // New high-value order inserted
          {
            operationType: 'insert',
            'fullDocument.amount': { $gt: 5000 }
          },
          // Order status changed to refunded
          {
            operationType: 'update',
            'updateDescription.updatedFields.status': 'refunded'
          }
        ]
      }
    }
  ];
  
  const changeStream = db.collection('orders').watch(pipeline, {
    fullDocument: 'updateLookup'  // include full doc on updates
  });
  
  changeStream.on('change', async (event) => {
    const order = event.fullDocument;
    
    if (event.operationType === 'insert') {
      io.to('admins').emit('highValueOrder', {
        orderId: order._id,
        amount: order.amount,
        customer: order.customerId
      });
    } else if (event.operationType === 'update') {
      io.to('admins').emit('refundAlert', {
        orderId: order._id,
        amount: order.amount
      });
    }
  });
  
  // Reconnect on error
  changeStream.on('error', async (err) => {
    console.error('Change stream error:', err);
    await changeStream.close();
    setTimeout(() => startOrderChangeStream(io), 5000);
  });
  
  return changeStream;
}

Problem: Implement session expiry for a web app: user sessions should automatically delete after 24 hours of inactivity. Refresh the TTL on every request without manual cleanup jobs.
// Create TTL index — documents expire 24 hours after lastActiveAt
db.sessions.createIndex(
  { lastActiveAt: 1 },
  { expireAfterSeconds: 86400 }  // 24 hours
);

// Create session
async function createSession(userId, req) {
  const sessionId = crypto.randomUUID();
  await db.collection('sessions').insertOne({
    _id: sessionId,
    userId,
    ipAddress: req.ip,
    userAgent: req.headers['user-agent'],
    createdAt: new Date(),
    lastActiveAt: new Date(),  // TTL starts from here
    data: {}
  });
  return sessionId;
}

// Refresh session on every authenticated request (reset TTL)
async function refreshSession(sessionId) {
  const result = await db.collection('sessions').updateOne(
    { _id: sessionId },
    { $set: { lastActiveAt: new Date() } }  // reset the TTL clock
  );
  if (result.matchedCount === 0) {
    throw new Error('Session not found or expired');
  }
}

// TTL index with fixed expiry (absolute expiry time field)
// Documents expire AT a specific time stored in the field:
db.passwordResets.createIndex(
  { expiresAt: 1 },
  { expireAfterSeconds: 0 }  // expire when field time is reached
);

// Insert: expires at a specific time
db.passwordResets.insertOne({
  email: "user@example.com",
  token: hashedToken,
  expiresAt: new Date(Date.now() + 15 * 60 * 1000)  // 15 min from now
});

Problem: Store payment card numbers and SSNs in MongoDB such that even a database admin cannot read plaintext values. Use Client-Side Field Level Encryption (CSFLE) with the Node.js driver.
const { MongoClient, ClientEncryption } = require('mongodb');

// Step 1: Create a data encryption key (DEK)
async function createEncryptionKey() {
  const keyVaultClient = new MongoClient(process.env.MONGODB_URI);
  await keyVaultClient.connect();
  
  const encryption = new ClientEncryption(keyVaultClient, {
    keyVaultNamespace: 'encryption.__dataKeys',
    kmsProviders: {
      local: {
        key: Buffer.from(process.env.LOCAL_MASTER_KEY, 'base64')  // 96-byte key
      }
    }
  });
  
  const dataKeyId = await encryption.createDataKey('local', {
    keyAltNames: ['user-pii-key']
  });
  return dataKeyId;
}

// Step 2: Configure encrypted client with JSON schema
const encryptedFieldsMap = {
  'mydb.users': {
    fields: [
      { path: 'ssn',     bsonType: 'string', algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic' },
      { path: 'cardNum', bsonType: 'string', algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Random' }
    ]
  }
};

// Deterministic: same plaintext → same ciphertext (allows equality queries)
// Random:        same plaintext → different ciphertext (more secure, no queries)

// Step 3: Use encrypted client — encryption/decryption is automatic
const encryptedClient = new MongoClient(process.env.MONGODB_URI, {
  autoEncryption: {
    keyVaultNamespace: 'encryption.__dataKeys',
    kmsProviders: { local: { key: Buffer.from(process.env.LOCAL_MASTER_KEY, 'base64') } },
    encryptedFieldsMap
  }
});

// Insert — ssn and cardNum are automatically encrypted
await encryptedClient.db('mydb').collection('users').insertOne({
  name: 'John Doe',
  ssn: '123-45-6789',     // stored as ciphertext
  cardNum: '4111111111111111'  // stored as ciphertext
});

Problem: Products have a nested categories array where each category has sub-categories (e.g., {main: "Electronics", sub: ["Phones", "Tablets"]}). Generate a flat list of all main+sub category combinations with product counts.
// Product structure: { name, price, categories: [{ main: "Electronics", subs: ["Phones", "Tablets"] }] }

db.products.aggregate([
  { $unwind: "$categories" },          // expand categories array
  { $unwind: "$categories.subs" },     // expand subs array within each category
  {
    $group: {
      _id: {
        main: "$categories.main",
        sub: "$categories.subs"
      },
      productCount: { $sum: 1 },
      avgPrice: { $avg: "$price" },
      products: {
        $push: { id: "$_id", name: "$name", price: "$price" }
      }
    }
  },
  {
    $group: {
      _id: "$_id.main",
      totalProducts: { $sum: "$productCount" },
      subCategories: {
        $push: {
          name: "$_id.sub",
          count: "$productCount",
          avgPrice: { $round: ["$avgPrice", 2] }
        }
      }
    }
  },
  { $sort: { totalProducts: -1 } },
  {
    $project: {
      _id: 0,
      mainCategory: "$_id",
      totalProducts: 1,
      subCategories: {
        $sortArray: { input: "$subCategories", sortBy: { count: -1 } }
      }
    }
  }
])

Problem: Sync a catalogue from an external source: insert new products, update existing ones, and delete products that are no longer in the source — all in one efficient batch operation.
async function syncProductCatalog(externalProducts, clientDb) {
  // Fetch current product IDs from DB
  const existingIds = new Set(
    (await clientDb.collection('products').distinct('externalId')).map(String)
  );
  
  const incomingIds = new Set(externalProducts.map(p => String(p.externalId)));
  const toDelete = [...existingIds].filter(id => !incomingIds.has(id));
  
  const operations = [];
  
  // Insert new + update existing
  for (const product of externalProducts) {
    operations.push({
      updateOne: {
        filter: { externalId: product.externalId },
        update: {
          $set: {
            name: product.name,
            price: product.price,
            stock: product.stock,
            updatedAt: new Date()
          },
          $setOnInsert: { createdAt: new Date() }
        },
        upsert: true
      }
    });
  }
  
  // Delete removed products
  for (const id of toDelete) {
    operations.push({
      deleteOne: { filter: { externalId: id } }
    });
  }
  
  const result = await clientDb.collection('products').bulkWrite(
    operations,
    { ordered: false }  // continue even on partial failure
  );
  
  return {
    inserted: result.upsertedCount,
    updated: result.modifiedCount,
    deleted: result.deletedCount
  };
}

Problem: Orders older than 1 year should be moved to an orders_archive collection to keep the main collection small. Do this without data loss using a safe two-phase approach.
// Phase 1: Copy to archive using $merge (safe — no data deleted yet)
const oneYearAgo = new Date();
oneYearAgo.setFullYear(oneYearAgo.getFullYear() - 1);

await db.collection('orders').aggregate([
  { $match: { createdAt: { $lt: oneYearAgo } } },
  {
    $merge: {
      into: "orders_archive",
      on: "_id",
      whenMatched: "keepExisting",  // don't overwrite already archived docs
      whenNotMatched: "insert"
    }
  }
]).toArray();  // must exhaust cursor

// Phase 2: Verify archive count before deleting
const archiveCount = await db.collection('orders_archive').countDocuments({
  createdAt: { $lt: oneYearAgo }
});
const sourceCount = await db.collection('orders').countDocuments({
  createdAt: { $lt: oneYearAgo }
});

if (archiveCount >= sourceCount) {
  // Phase 3: Safe to delete from main collection
  const result = await db.collection('orders').deleteMany({
    createdAt: { $lt: oneYearAgo }
  });
  console.log(`Archived and removed ${result.deletedCount} orders`);
} else {
  throw new Error(`Archive count mismatch! Expected ${sourceCount}, got ${archiveCount}`);
}

// Add index on archive collection for queries
db.orders_archive.createIndex({ createdAt: 1 });

Problem: An IoT device sends temperature readings every 5 seconds. Store in a time-series collection and query: average hourly temperature, detect anomalies (> 2 standard deviations), and compute 1-hour rolling average.
// Create time-series collection (MongoDB 5.0+)
await db.createCollection('sensorReadings', {
  timeseries: {
    timeField: 'timestamp',
    metaField: 'metadata',      // { deviceId, location }
    granularity: 'seconds'      // seconds | minutes | hours
  },
  expireAfterSeconds: 7776000   // auto-expire after 90 days
});

// Insert sensor data
await db.collection('sensorReadings').insertMany([
  { timestamp: new Date(), metadata: { deviceId: 'sensor-01', location: 'factory-floor' }, temperature: 23.5, humidity: 65 },
  // ...
]);

// Query 1: Average hourly temperature per sensor
db.sensorReadings.aggregate([
  { $match: {
    'metadata.deviceId': 'sensor-01',
    timestamp: { $gte: new Date(Date.now() - 86400000) }  // last 24h
  }},
  { $group: {
    _id: { $dateTrunc: { date: "$timestamp", unit: "hour" } },
    avgTemp: { $avg: "$temperature" },
    minTemp: { $min: "$temperature" },
    maxTemp: { $max: "$temperature" },
    readings: { $sum: 1 }
  }},
  { $sort: { _id: 1 } }
])

// Query 2: Rolling 1-hour average (window functions)
db.sensorReadings.aggregate([
  { $match: { 'metadata.deviceId': 'sensor-01' } },
  { $sort: { timestamp: 1 } },
  {
    $setWindowFields: {
      partitionBy: "$metadata.deviceId",
      sortBy: { timestamp: 1 },
      output: {
        rollingAvgTemp: {
          $avg: "$temperature",
          window: { range: [-3600, 0], unit: "second" }  // 1 hour window
        },
        stdDev: { $stdDevSamp: "$temperature", window: { range: [-3600, 0], unit: "second" } }
      }
    }
  },
  { $addFields: {
    isAnomaly: { $gt: [
      { $abs: { $subtract: ["$temperature", "$rollingAvgTemp"] } },
      { $multiply: ["$stdDev", 2] }
    ]}
  }},
  { $match: { isAnomaly: true } }
])

Problem: Two users may update the same document simultaneously. Implement optimistic concurrency control using a version number to detect conflicts and retry, preventing lost updates without using transactions.
// Document has a __v (version) field — increment on every update
// If __v doesn't match, another update happened in between

async function updateProductPrice(productId, newPrice, expectedVersion, maxRetries = 3) {
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    // Only update if version matches — guarantees no intermediate updates
    const result = await db.collection('products').findOneAndUpdate(
      {
        _id: ObjectId(productId),
        __v: expectedVersion          // version guard
      },
      {
        $set: { price: newPrice, updatedAt: new Date() },
        $inc: { __v: 1 }              // increment version
      },
      { returnDocument: 'after' }
    );
    
    if (result) {
      return result;  // success
    }
    
    // Conflict! Re-read current version and retry
    const current = await db.collection('products').findOne({ _id: ObjectId(productId) });
    if (!current) throw new Error('Product not found');
    
    console.log(`Conflict on attempt ${attempt}. Re-reading version ${current.__v}`);
    expectedVersion = current.__v;
    
    // Could apply business logic to merge changes here
    if (attempt === maxRetries) {
      throw new Error(`Optimistic lock conflict after ${maxRetries} attempts`);
    }
    
    await new Promise(r => setTimeout(r, 50 * attempt));  // backoff
  }
}

// Usage
update = await updateProductPrice("product123", 2999.99, 5);
// If someone changed the product between read and write,
// we get a conflict and automatically retry with fresh data

Problem: A social platform where users follow others. Implement a news feed query that returns paginated posts from all followed users, sorted by most recent. Handle the fan-out and performance efficiently.
// Approach 1: Fan-out on read (pull model) — good for users with few follows
async function getFeedPullModel(userId, lastPostId = null, limit = 20) {
  // Get followed user IDs
  const user = await db.collection('users').findOne(
    { _id: userId },
    { projection: { following: 1 } }
  );
  
  const filter = {
    authorId: { $in: user.following },
    isPublic: true
  };
  
  // Cursor-based pagination for stability
  if (lastPostId) {
    const lastPost = await db.collection('posts').findOne({ _id: ObjectId(lastPostId) });
    filter.createdAt = { $lt: lastPost.createdAt };
  }
  
  return db.collection('posts')
    .find(filter)
    .sort({ createdAt: -1 })
    .limit(limit)
    .toArray();
}

// Support index: { authorId: 1, createdAt: -1, isPublic: 1 }

// Approach 2: Fan-out on write (push model) — good for high-traffic feeds
// When a user posts, write to each follower's feed collection
async function createPost(authorId, content) {
  const post = await db.collection('posts').insertOne({
    authorId, content, createdAt: new Date()
  });
  
  // Get followers
  const author = await db.collection('users').findOne({ _id: authorId }, { projection: { followers: 1 } });
  
  // Fan out to each follower's feed (async, via queue in production)
  if (author.followers.length < 10000) {  // only for non-celebrities
    const feedEntries = author.followers.map(followerId => ({
      userId: followerId,
      postId: post.insertedId,
      authorId,
      createdAt: new Date()
    }));
    await db.collection('feeds').insertMany(feedEntries);
  }
}

// Reading feed is now simple and fast:
const feed = await db.collection('feeds')
  .find({ userId: currentUser._id })
  .sort({ createdAt: -1 })
  .limit(20)
  .toArray();