MongoDB

Interview Queries 1

20 Queries

Problem: You have an orders collection. Each order has customerId, amount, and createdAt fields. Find the top 5 customers ranked by total spending in the last 30 days.
db.orders.aggregate([
  {
    $match: {
      createdAt: { $gte: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000) }
    }
  },
  {
    $group: {
      _id: "$customerId",
      totalAmount: { $sum: "$amount" },
      orderCount: { $sum: 1 }
    }
  },
  { $sort: { totalAmount: -1 } },
  { $limit: 5 },
  {
    $lookup: {
      from: "customers",
      localField: "_id",
      foreignField: "_id",
      as: "customer"
    }
  },
  { $unwind: "$customer" },
  {
    $project: {
      _id: 0,
      customerId: "$_id",
      name: "$customer.name",
      email: "$customer.email",
      totalAmount: 1,
      orderCount: 1
    }
  }
])

Problem: The users collection may have duplicate emails due to a bug. Find all email addresses that appear more than once, along with the count.
db.users.aggregate([
  {
    $group: {
      _id: { $toLower: "$email" },  // normalize case
      count: { $sum: 1 },
      userIds: { $push: "$_id" }
    }
  },
  { $match: { count: { $gt: 1 } } },
  { $sort: { count: -1 } },
  {
    $project: {
      _id: 0,
      email: "$_id",
      duplicateCount: "$count",
      affectedUserIds: "$userIds"
    }
  }
])

// To delete duplicates (keep the oldest _id):
db.users.aggregate([
  { $group: { _id: "$email", ids: { $push: "$_id" }, count: { $sum: 1 } } },
  { $match: { count: { $gt: 1 } } }
]).forEach(doc => {
  doc.ids.shift(); // keep first, delete rest
  db.users.deleteMany({ _id: { $in: doc.ids } });
});

Problem: Generate a month-by-month revenue breakdown for the current year, including total amount, number of orders, and average order value.
const currentYear = new Date().getFullYear();

db.orders.aggregate([
  {
    $match: {
      status: "completed",
      createdAt: {
        $gte: new Date(`${currentYear}-01-01`),
        $lt: new Date(`${currentYear + 1}-01-01`)
      }
    }
  },
  {
    $group: {
      _id: { month: { $month: "$createdAt" } },
      totalRevenue: { $sum: "$amount" },
      orderCount: { $sum: 1 },
      avgOrderValue: { $avg: "$amount" }
    }
  },
  { $sort: { "_id.month": 1 } },
  {
    $project: {
      _id: 0,
      month: "$_id.month",
      totalRevenue: { $round: ["$totalRevenue", 2] },
      orderCount: 1,
      avgOrderValue: { $round: ["$avgOrderValue", 2] }
    }
  }
])

Problem: The products collection has all products; orders collection embeds an items array with productId. Find products with zero orders (left join equivalent using $lookup + $match).
db.products.aggregate([
  {
    $lookup: {
      from: "orders",
      localField: "_id",
      foreignField: "items.productId",
      as: "orderRefs"
    }
  },
  { $match: { orderRefs: { $size: 0 } } },  // no orders
  {
    $project: {
      _id: 1,
      name: 1,
      price: 1,
      category: 1,
      createdAt: 1
    }
  },
  { $sort: { createdAt: -1 } }
])

// Alternative: using $in with two queries (efficient for large datasets)
const orderedProductIds = await db.collection('orders')
  .distinct('items.productId');
const neverOrdered = await db.collection('products')
  .find({ _id: { $nin: orderedProductIds } })
  .toArray();

Problem: Business logic requires marking all status: "pending" orders that are older than 7 days as status: "expired". Also update an expiredAt timestamp.
const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);

// Update many documents in one operation
const result = await db.collection('orders').updateMany(
  {
    status: "pending",
    createdAt: { $lt: sevenDaysAgo }
  },
  {
    $set: {
      status: "expired",
      expiredAt: new Date()
    },
    $inc: { __version: 1 }  // optimistic concurrency tracking
  }
);

console.log(`Expired ${result.modifiedCount} orders`);

// With aggregation pipeline update (MongoDB 4.2+):
await db.collection('orders').updateMany(
  { status: "pending", createdAt: { $lt: sevenDaysAgo } },
  [
    {
      $set: {
        status: "expired",
        expiredAt: "$$NOW",
        daysToExpiry: {
          $divide: [{ $subtract: ["$$NOW", "$createdAt"] }, 86400000]
        }
      }
    }
  ]
);

Problem: The orders collection has an items array with productId, category, and quantity. Find the product with the highest total quantity sold in each category.
db.orders.aggregate([
  { $match: { status: "completed" } },
  { $unwind: "$items" },
  {
    $group: {
      _id: { category: "$items.category", productId: "$items.productId" },
      totalQuantity: { $sum: "$items.quantity" },
      productName: { $first: "$items.name" }
    }
  },
  { $sort: { totalQuantity: -1 } },
  {
    $group: {
      _id: "$_id.category",
      topProduct: {
        $first: {
          productId: "$_id.productId",
          name: "$productName",
          totalQuantity: "$totalQuantity"
        }
      }
    }
  },
  { $sort: { _id: 1 } },
  {
    $project: {
      _id: 0,
      category: "$_id",
      topProduct: 1
    }
  }
])

Problem: Generate a daily user registration report for the past week, showing the date and count of new users registered, ensuring missing days show as 0.
const sevenDaysAgo = new Date();
sevenDaysAgo.setDate(sevenDaysAgo.getDate() - 7);
sevenDaysAgo.setHours(0, 0, 0, 0);

db.users.aggregate([
  { $match: { createdAt: { $gte: sevenDaysAgo } } },
  {
    $group: {
      _id: {
        $dateToString: { format: "%Y-%m-%d", date: "$createdAt" }
      },
      count: { $sum: 1 }
    }
  },
  { $sort: { _id: 1 } },
  {
    $project: {
      _id: 0,
      date: "$_id",
      newUsers: "$count"
    }
  }
])

// To fill missing days (no registrations), use $densify (MongoDB 5.1+):
db.users.aggregate([
  { $match: { createdAt: { $gte: sevenDaysAgo } } },
  { $group: { _id: { $dateTrunc: { date: "$createdAt", unit: "day" } }, count: { $sum: 1 } } },
  { $densify: { field: "_id", range: { step: 1, unit: "day", bounds: "full" } } },
  { $fill: { output: { count: { value: 0 } } } }
])

Problem: The users collection has a points field. Create a leaderboard with proper rank numbering — users with equal scores get the same rank, and the next rank skips accordingly (dense or standard ranking).
// Standard ranking (1, 1, 3 — skip on tie)
db.users.aggregate([
  { $sort: { points: -1 } },
  {
    $setWindowFields: {
      sortBy: { points: -1 },
      output: {
        rank: { $rank: {} }
      }
    }
  },
  { $limit: 100 },
  {
    $project: {
      _id: 0,
      username: 1,
      points: 1,
      rank: 1
    }
  }
])

// Dense ranking (1, 1, 2 — no skip)
db.users.aggregate([
  { $sort: { points: -1 } },
  {
    $setWindowFields: {
      sortBy: { points: -1 },
      output: { rank: { $denseRank: {} } }
    }
  },
  { $limit: 10 }
])

// User's own rank (efficient — no full sort needed)
const userPoints = 850;
const rank = await db.collection('users').countDocuments({ points: { $gt: userPoints } });
// rank + 1 = user's position

Problem: Orders have an embedded items array. Find all orders that contain a specific product ID, and include only that product's line item details from the array.
const targetProductId = ObjectId("64a1f2b3c4d5e6f7a8b9c0d1");

// Find orders containing the product
db.orders.find(
  { "items.productId": targetProductId },
  { customerId: 1, createdAt: 1, status: 1,
    items: { $elemMatch: { productId: targetProductId } } }
)

// With aggregation — filter array to show only matching item
db.orders.aggregate([
  { $match: { "items.productId": targetProductId } },
  {
    $project: {
      customerId: 1,
      createdAt: 1,
      status: 1,
      orderedItem: {
        $filter: {
          input: "$items",
          as: "item",
          cond: { $eq: ["$$item.productId", targetProductId] }
        }
      }
    }
  },
  { $sort: { createdAt: -1 } }
])

// Index to support this query:
db.orders.createIndex({ "items.productId": 1 })

Problem: Products with price > 5000 should have the tag "premium" added to their tags array. Avoid duplicates — don't add "premium" if it's already there.
// $addToSet prevents duplicates in arrays
await db.collection('products').updateMany(
  { price: { $gt: 5000 } },
  { $addToSet: { tags: "premium" } }
);

// Remove a tag from all documents
await db.collection('products').updateMany(
  { tags: "outdated" },
  { $pull: { tags: "outdated" } }
);

// Add multiple tags to matching documents
await db.collection('products').updateMany(
  { category: "electronics", price: { $gt: 10000 } },
  {
    $addToSet: {
      tags: { $each: ["premium", "featured", "electronics-flagship"] }
    }
  }
);

// Bulk update with different values per document using bulkWrite
const operations = products.map(p => ({
  updateOne: {
    filter: { _id: p._id },
    update: { $addToSet: { tags: p.isPopular ? "trending" : "standard" } }
  }
}));
await db.collection('products').bulkWrite(operations, { ordered: false });

Problem: Generate a report showing daily sales and the running (cumulative) total of sales for the month, to identify when the monthly target was reached.
db.orders.aggregate([
  {
    $match: {
      status: "completed",
      createdAt: {
        $gte: new Date("2024-01-01"),
        $lt: new Date("2024-02-01")
      }
    }
  },
  {
    $group: {
      _id: { $dateToString: { format: "%Y-%m-%d", date: "$createdAt" } },
      dailySales: { $sum: "$amount" }
    }
  },
  { $sort: { _id: 1 } },
  {
    $setWindowFields: {
      sortBy: { _id: 1 },
      output: {
        cumulativeSales: {
          $sum: "$dailySales",
          window: { documents: ["unbounded", "current"] }
        },
        movingAvg7Day: {
          $avg: "$dailySales",
          window: { documents: [-6, "current"] }  // last 7 days
        }
      }
    }
  },
  {
    $project: {
      _id: 0,
      date: "$_id",
      dailySales: { $round: ["$dailySales", 2] },
      cumulativeSales: { $round: ["$cumulativeSales", 2] },
      movingAvg7Day: { $round: ["$movingAvg7Day", 2] }
    }
  }
])

Problem: Identify "loyal" customers who placed at least one order in each of the last 3 consecutive months.
const now = new Date();
const threeMonthsAgo = new Date(now.getFullYear(), now.getMonth() - 2, 1);

db.orders.aggregate([
  {
    $match: {
      status: "completed",
      createdAt: { $gte: threeMonthsAgo }
    }
  },
  {
    $group: {
      _id: {
        customerId: "$customerId",
        yearMonth: {
          $dateToString: { format: "%Y-%m", date: "$createdAt" }
        }
      }
    }
  },
  {
    $group: {
      _id: "$_id.customerId",
      activeMonths: { $addToSet: "$_id.yearMonth" }
    }
  },
  {
    $match: { $expr: { $gte: [{ $size: "$activeMonths" }, 3] } }
  },
  {
    $lookup: {
      from: "users",
      localField: "_id",
      foreignField: "_id",
      as: "user"
    }
  },
  { $unwind: "$user" },
  {
    $project: {
      name: "$user.name",
      email: "$user.email",
      activeMonths: 1
    }
  }
])

Problem: When an order is placed for multiple products, decrement each product's stock atomically. Ensure no product goes below 0 (reject if insufficient stock). Use transactions for atomicity.
async function placeOrder(customerId, items, session) {
  // Start a transaction
  const txSession = session || client.startSession();
  try {
    await txSession.withTransaction(async () => {
      // Check and decrement stock for each item atomically
      for (const item of items) {
        const result = await db.collection('products').updateOne(
          {
            _id: item.productId,
            stock: { $gte: item.quantity }  // only update if enough stock
          },
          { $inc: { stock: -item.quantity } },
          { session: txSession }
        );
        
        if (result.matchedCount === 0) {
          throw new Error(`Insufficient stock for product ${item.productId}`);
        }
      }
      
      // Create the order
      await db.collection('orders').insertOne({
        customerId,
        items,
        status: "confirmed",
        totalAmount: items.reduce((sum, i) => sum + i.price * i.quantity, 0),
        createdAt: new Date()
      }, { session: txSession });
    });
    console.log("Order placed successfully");
  } finally {
    if (!session) await txSession.endSession();
  }
}

Problem: The employees collection has a salary field. Find the second highest salary value and all employees who earn that salary.
// Method 1: Two-stage approach
const salaries = await db.collection('employees').aggregate([
  { $group: { _id: "$salary" } },
  { $sort: { _id: -1 } },
  { $skip: 1 },   // skip the highest
  { $limit: 1 }   // take second
]).toArray();

const secondHighest = salaries[0]?._id;
const employees = await db.collection('employees')
  .find({ salary: secondHighest }).toArray();

// Method 2: Using $setWindowFields (elegant)
db.employees.aggregate([
  {
    $setWindowFields: {
      sortBy: { salary: -1 },
      output: {
        denseRank: { $denseRank: {} }
      }
    }
  },
  { $match: { denseRank: 2 } },
  { $project: { _id: 0, name: 1, salary: 1, department: 1 } }
])

// Method 3: Single query with $bucket
db.employees.aggregate([
  { $sort: { salary: -1 } },
  { $group: { _id: null, salaries: { $push: "$salary" } } },
  { $project: { second: { $arrayElemAt: [{ $setUnion: ["$salaries"] }, 1] } } }
])

Problem: Generate a category-wise sales report showing total revenue per category and each category's percentage contribution to total revenue.
db.orders.aggregate([
  { $match: { status: "completed" } },
  { $unwind: "$items" },
  {
    $group: {
      _id: "$items.category",
      categoryRevenue: { $sum: { $multiply: ["$items.price", "$items.quantity"] } },
      itemsSold: { $sum: "$items.quantity" }
    }
  },
  { $sort: { categoryRevenue: -1 } },
  {
    $group: {
      _id: null,
      categories: {
        $push: {
          category: "$_id",
          revenue: "$categoryRevenue",
          itemsSold: "$itemsSold"
        }
      },
      grandTotal: { $sum: "$categoryRevenue" }
    }
  },
  { $unwind: "$categories" },
  {
    $project: {
      _id: 0,
      category: "$categories.category",
      revenue: { $round: ["$categories.revenue", 2] },
      itemsSold: "$categories.itemsSold",
      percentage: {
        $round: [
          { $multiply: [{ $divide: ["$categories.revenue", "$grandTotal"] }, 100] },
          2
        ]
      }
    }
  },
  { $sort: { revenue: -1 } }
])

Problem: The users collection has createdAt and lastLoginAt fields. Find users who registered more than 7 days ago but have never logged in (lastLoginAt is null or equals createdAt).
const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);

// Find dormant users
const dormantUsers = await db.collection('users').find({
  createdAt: { $lt: sevenDaysAgo },
  $or: [
    { lastLoginAt: null },
    { lastLoginAt: { $exists: false } }
  ],
  isActive: true
}).project({ name: 1, email: 1, createdAt: 1 }).toArray();

// Count by registration cohort (week)
db.users.aggregate([
  {
    $match: {
      createdAt: { $lt: sevenDaysAgo },
      $or: [{ lastLoginAt: null }, { lastLoginAt: { $exists: false } }]
    }
  },
  {
    $group: {
      _id: {
        week: { $week: "$createdAt" },
        year: { $year: "$createdAt" }
      },
      neverLoggedIn: { $sum: 1 }
    }
  },
  { $sort: { "_id.year": -1, "_id.week": -1 } }
])

Problem: Implement stable pagination for comments sorted by likes descending, then by _id descending (for tie-breaking). Support both page-number and cursor-based approaches.
// Cursor-based pagination (efficient, recommended)
// First page
const firstPage = await db.collection('comments')
  .find({ postId: targetPostId })
  .sort({ likes: -1, _id: -1 })
  .limit(20)
  .toArray();

// Next page — cursor after last item
const lastItem = firstPage[firstPage.length - 1];
const nextPage = await db.collection('comments').find({
  postId: targetPostId,
  $or: [
    { likes: { $lt: lastItem.likes } },
    { likes: lastItem.likes, _id: { $lt: lastItem._id } }
  ]
}).sort({ likes: -1, _id: -1 }).limit(20).toArray();

// Index to support this query efficiently
db.comments.createIndex({ postId: 1, likes: -1, _id: -1 })

// Express route implementation
app.get('/api/posts/:postId/comments', async (req, res) => {
  const { afterLikes, afterId } = req.query;
  const filter = { postId: req.params.postId };
  if (afterLikes && afterId) {
    filter.$or = [
      { likes: { $lt: parseInt(afterLikes) } },
      { likes: parseInt(afterLikes), _id: { $lt: ObjectId(afterId) } }
    ];
  }
  const comments = await db.collection('comments')
    .find(filter).sort({ likes: -1, _id: -1 }).limit(20).toArray();
  res.json({ comments, nextCursor: comments[19]?._id });
});

Problem: Implement a thread-safe sequential ID generator (like an auto-increment counter) using MongoDB. Multiple concurrent requests must each get a unique, sequential number.
// Counter collection approach
async function getNextSequenceValue(sequenceName) {
  const result = await db.collection('counters').findOneAndUpdate(
    { _id: sequenceName },
    { $inc: { seq: 1 } },
    {
      returnDocument: 'after',  // return updated document
      upsert: true              // create if doesn't exist
    }
  );
  return result.seq;
}

// Usage
const invoiceNumber = await getNextSequenceValue('invoices');  // 1001
const orderId = await getNextSequenceValue('orders');           // 5001

// Initialize counters with starting values
await db.collection('counters').insertMany([
  { _id: 'invoices', seq: 1000 },
  { _id: 'orders',   seq: 5000 },
  { _id: 'tickets',  seq: 0 }
]);

// Thread-safe — findOneAndUpdate is atomic
// 1000 concurrent requests all get unique numbers
// No two requests ever get the same value

Problem: Segment customers into "new" (first order ever) and "returning" (second order or more) and compare their average order values to understand purchasing behavior differences.
db.orders.aggregate([
  { $match: { status: "completed" } },
  { $sort: { customerId: 1, createdAt: 1 } },
  {
    $setWindowFields: {
      partitionBy: "$customerId",
      sortBy: { createdAt: 1 },
      output: {
        orderNumber: { $documentNumber: {} }  // 1st, 2nd, 3rd order per customer
      }
    }
  },
  {
    $addFields: {
      segment: {
        $cond: [{ $eq: ["$orderNumber", 1] }, "new_customer", "returning_customer"]
      }
    }
  },
  {
    $group: {
      _id: "$segment",
      avgOrderValue: { $avg: "$amount" },
      totalOrders: { $sum: 1 },
      totalRevenue: { $sum: "$amount" }
    }
  },
  {
    $project: {
      _id: 0,
      segment: "$_id",
      avgOrderValue: { $round: ["$avgOrderValue", 2] },
      totalOrders: 1,
      totalRevenue: { $round: ["$totalRevenue", 2] }
    }
  }
])

Problem: You receive a daily batch of 10,000 user profile records from an external API. Upsert them into MongoDB — update existing users and insert new ones — as efficiently as possible.
// Efficient bulk upsert with bulkWrite — unordered for max performance
async function bulkUpsertUsers(userBatch) {
  const BATCH_SIZE = 1000;
  const results = { upserted: 0, modified: 0 };

  for (let i = 0; i < userBatch.length; i += BATCH_SIZE) {
    const chunk = userBatch.slice(i, i + BATCH_SIZE);
    
    const operations = chunk.map(user => ({
      updateOne: {
        filter: { externalId: user.externalId },  // unique external ID
        update: {
          $set: {
            name: user.name,
            email: user.email,
            phone: user.phone,
            updatedAt: new Date()
          },
          $setOnInsert: {
            createdAt: new Date(),
            isActive: true,
            plan: 'free'
          }
        },
        upsert: true  // insert if not exists
      }
    }));

    const result = await db.collection('users').bulkWrite(
      operations,
      { ordered: false }  // continue even if some fail
    );
    
    results.upserted += result.upsertedCount;
    results.modified += result.modifiedCount;
  }
  
  return results;
}

const result = await bulkUpsertUsers(externalUsers);
console.log(`Upserted: ${result.upserted}, Modified: ${result.modified}`);