danielkraic · February 24, 2017 13:26
diff --git a/mongo-course-notes b/mongo-course-notes
 db.data.drop()

 db.data.insertOne({"name": "val1"})
 db.data.insertOne({"_id": "id100", "name": "val1"})

 db.data.insertMany([
        { "name": "val1" },
        { "name": "val2" }
    ]
 )

 db.data.insertMany(    [
      { "name": "val1" },
      { "name": "val2" }
    ],
    { "ordered": false}
 )

 _id, type of ObjectId() by default
 12-byte hex string: 4: date + 3: mac addr + 2: process_id + 3: counter


 db.data.find()
 db.data.find().pretty()

 db.data.find({"rated": "PG-13"})
 db.data.find({"rated": "PG-13"}).count()
 db.data.find({"rated": "PG-13", "year": 2009})

 db.data.find({"tomato.meter": 100})

 db.data.find({"writers": ["Eathan", "Joel"]}) // MATCH ENTIRE ARRAY 
 db.data.find({"actors": "Brad"}) // MATCH ANY ELEMENT OF ARRAY
 db.data.find({"actors.0": "Brad"}) // MATCH FIRST ELEMENT OF ARRAY

 // cursor
 var c = db.data.find()
 var doc = function() { return c.hasNext() ? c.next() : null; }
 doc()

 c.objsLeftInBadge()

 // projection
 db.data.find({"rated": "PG-13"}, { title: 1, _id: 0 })

 // query operators

 // comparsion $eq, $ne, $lt, $lte, $gt, $gte, $in, $nin
 db.data.find({"rated": { $ne: "UNRATED" }})
 db.data.find({"runtime": { $gt: 90 }})
 db.data.find({"runtime": { $gte: 90, $lte: 120 }})
 db.data.find({"tomato.meter": {$gt: 95}, "runtime": { $gte: 90, $lte: 120 }})
 db.data.find({"rated": { $in: ["G", "PG"] }})

 // element $exists, $type
 db.data.find({"tomate.meter": { $exists: true }}).count()
 db.data.find({"tomate.meter": { $exists: false }}).count()
 .db.data.find({"_id": { $type: "string" }})

 // logical operators $or $and $not $nor
 db.data.find( { $or: [ {"tomato.eter": { $gte: 95}, { "metacritic": $gt: 88} ] } )
 db.data.find({ $and: [ {"metacritic: {$not: null}}, {"metacritic": {$exists: true}} ] })

 // regex
 d.data.find({ "awards.text": { $regex: /^Won\s.*/ } })
 d.data.find({ "awards.text": { $regex: '^Won\s.*' } })
 d.data.find({ "awards.text": { $regex: '^Won\s.*', $options: "i" } })

 // arays operator
 { "genre": { $all: ["Comedy", "Crime", "Drama"] } }
 { "country": { $size: 1 } }
 { "boxoffice": { $elemMatch: { "country": "UK", "revenue": { $gt: 15} } } }

 db.movieDetails.find({'awards.oscars.award': 'bestPicture'})
 db.movieDetails.updateMany({ year: {$gte: 2010, $lte: 2013},
                             "imdb.votes": {$lt: 10000},
                             $and: [{"tomato.consensus": {$exists: true} },
                                    {"tomato.consensus": null} ] },
                           { $unset: { "tomato.consensus": "" } });

 // update

 db.data.updateOne({"title": "The Martian"}, {$set: {"poster": "http://d.c"})
 db.data.updateOne({"title": "The Martian"}, {$set: {"awards": {"wins": 0, "nominations": 2}}})

 // update operators 

 // $set, $unset, $rename, $min, $max, $inc, $mul, ...
 db.data.updateOne({"title": "Martian"}, {$inc: {"tomao.revies": 4, "tomato.userReviews: 3}})

 // arrays operators: $addToSet $pop $pull $pullAll $push
 db.data.updateOne(
    {"title": "Martian"},
    { $push : { "reviews":
                { "rating": 1, "author": "Someone", "text": ".."}
              }
    }
 )
 db.data.updateOne(
    {"title": "Martian"},
    { $push : { "reviews":
                { $each: [
                        { "rating": 1, "author": "Someone", "text": ".."},
                        { "rating": 2, "author": "Someone2", "text": ".."}
                       ]
                }
              }
    }
 )

 // update many
 db.data.updateMany({"rated": null}, {$set: {"rated": "UNRATED"} })
 db.data.updateMany({"rated": null}, {$unset: {"rated": ""} })

 // upsert
 db.data.update({...}, {...}, "upsert": true)

 // replaceOne

 db.data.replaceOne()    

 SORT -> SKIP -> LIMIT


 HOMEWORK
 1:
 db.grades.find({'score': {$gte: 65}}, {'_id': 0, 'student_id': 1}).sort({'score': 1}).limit(1)
 
 2: 
 ```python
 for doc in m.students.grades.find({'type': 'homework'}).sort([('student_id', pymongo.ASCENDING),('score', pymongo.ASCENDING)]):
     if last_student != doc['student_id']:
         counter += 1
         last_student = doc['student_id']
         g.remove({'_id': doc['_id']})
 ```

 3:





 ch4:

 // hint to use index

 db.students.find({student_id:{$gt:500000}, class_id:54}).sort({student_id:1}).hint({class_id:1}).explain("executionstats")

 // profiling

 db.getProfilingStatus()
 db.getProfilingLevel()

 db.setProfilingLevel(0)    // do not log queries
 db.setProfilingLevel(1, 4) // log queries slower than 4s
 db.setProfilingLevel(2)    // log all queries

 db.system.profile.find()
 db.system.profile.find().sort({ts:1})
 db.system.profile.find({ns:/school.students/})
 db.system.profile.find({millis: {$lt:1}}, {ts:1, millis:1}).sort({ts:-1})

 mongotop
 mongotop 5 // 5 seconds refresh

 mongotop
 mongostat




 storage engines

 mmap v1:
 collection level locking
 in place updates
 power of two sizes allocations

 wired tiger:
 document level concurrency (no locking is needed)
 compression (documents data, indexes)
 append only (no inplace updates)

 killall mongod
 mkdir WT
 mongod -dbpath WT -storageEngine wiredTiger
 mongod --dbpath WT --storageEngine wiredTiger -wiredTigerIndexPrefixCompression true
 db.foo.stats()

 index: btree

 db.students.createIndex({student_id:1})
 db.students.createIndex({student_id:1, class_id:-1})
 db.students.createIndex({location:'2d'})
 db.students.createIndex({location:'2dsphere'})

 // full text search
 db.students.createIndex({location:'text'})
 db.data.find({'$text': {'$search': 'dog cat'}}) //find dog or cat in document

 db.students.getIndexes()
 db.students.dropIndex({student_id:1})
 // multikey indexes - on array

 db.students.createIndex({student_id:1}, {'unique':true})
 db.students.createIndex({student_id:1}, {'unique':true, 'sparse':true}) // sprase - do not use null values in index
 db.students.createIndex({student_id:1}, {'background':true})

 foreground index creation:
 fast, block W/R to whole DB (all collections)
 background index creation:
 slow, no blocks

 explain: find, update, remove, aggregate (wont work with insert)

 db.students.explain().find({student_id: 5}) //COLLSCAN, docsExamined
 db.students.explain(true).find({student_id: 5}) //COLLSCAN, docsExamined

 var exp = db.students.explain()
 exp.help()
 exp.find({'name': 'noo'}).sort('name':1)

 explain verbosity:
 queryPlanner // default
 executionStats
 allPlansExecution

 var exp = db.students.explain('executionStats')
 var exp = db.students.explain('allPlansExecution')

 covered query: query result is satisfied with index (no documents need to be inspected)
 NO  db.ijk_data.find({'i':10, 'k':20})
 NO  db.ijk_data.find({'i':10, 'k':20}, {'_id': 0})
 YES db.ijk_data.find({'i':10, 'k':20}, {'_id': 0, 'i':1, 'j':1, 'k':1})

 db.examples.stats()
 db.examples.totalIndexSize()

 db.students.find({'scores': { $elemMatch: {'type':'exam', 'score': {$gt: 98}} } })

 db.stores.find({ loc:{ $near: { $geometry: { type: "Point", coordinates: [-130, 39]}, $maxDistance:1000000 } } })

 for (i=0; i<10; i++) { db.ex.insert({'a': i*i}) }
 for (i=0; i<10; i++) { x=[]; for (j=0; j<10; j++) { x.push({"key": i, "val": j}); }; db.ex.insert({'arr': x}); }



 agregation pipeline:
 $project (reshape doc)
 $match
 $group
 $sort (100mb limit to memory based sort, or disk based sort)
 $skip (after sort)
 $limit (after sort)
 $unwind (nornalize, explode data)
 $out (redirect to coll)
 $redact
 $geoner

 $group:
 $sum $avg $min $max
 $push $addtoset (for building arrays)
 $first $last (only with $sort)

 explain: db.zips.aggregate([...], {explain:true})
 allowDiskUse (100mb limit for in-memory aggregation): db.zips.aggregate([...], {allowDiskUse:true})
 cursor

 db.products.aggregate([{$group:{_id:"$manufacturer",num_products:{$sum:1}}}])
 db.products.aggregate([{$group:{_id:{"cat":"$category"},num_products:{$sum:1}}}])
 db.products.aggregate([{$group:{_id:{"maker": "$manufacturer", "cat":"$category"},num_products:{$sum:1}}}])

 db.products.aggregate([{$group:{_id:{"maker": "$manufacturer"}, sum_price:{$sum:"$price"}}}])
 db.zips.aggregate([{$group:{ _id: "$state", "population": {$sum:"$pop"}}}])

 db.zips.aggregate([{$group:{ _id: "$_id", "average_pop": {$avg:"$pop"}}}])
 db.zips.aggregate([{"$group":{"_id":"$state", "average_pop":{"$avg":"$pop"}}}])

 db.zips.aggregate([{$group: {_id: "$city", "postal_codes": {$addToSet: "$_id"}}}])
 db.zips.aggregate([{$group: {_id: "$state", "pop": {$max: "$pop"}}}])

 db.grades.aggregate([
    {'$group':{_id:{class_id:"$class_id", student_id:"$student_id"}, 'average':{"$avg":"$score"}}},
    {'$group':{_id:"$_id.class_id", 'average':{"$avg":"$average"}}}])

 db.products.aggregate([
    {$project: {
 	 _id:0,
 	 'maker': {$toLower:"$manufacturer"},
 	 'details': {'category': "$category",
 		     'price' : {"$multiply":["$price",10]}
 		    },
 	 'item':'$name'
    }}
 ])

 db.zips.aggregate([{$project: {_id:0, "city": {$toLower:"$city"}, "pop":1, "state":1, "zip":"$_id"}}])

 db.zips.aggregate([
    {$match:
     {
 	 state:"NY"
     }
    },
    {$group:
     {
 	 _id: "$city",
 	 population: {$sum:"$pop"},
 	 zip_codes: {$addToSet: "$_id"}
     }
    },
    {$project:
     {
 	 _id: 0,
 	 city: "$_id",
 	 population: 1,
 	 zip_codes:1
     }
    }
     
 ])

 db.zips.aggregate([{$match:{pop:{$gt:100000}}}])

 db.zips.aggregate([
 	{$match: {"state" : "NY"}},
 	{$group: {_id: "$city", "pop": {$sum: "$pop"}}},
 	{$project: {_id:0, "city": "$_id", pop:1}},
 	{$sort: {pop:-1}} ])
 	

 db.zips.aggregate([
 	{$match:{pop:{$gt:100000}}}
 	{$skip: 4},
 	{$limit: 10}
 ])

 use agg
 db.zips.aggregate([
    /* get the population of every city in every state */
    {$group:
     {
 	 _id: {state:"$state", city:"$city"},
 	 population: {$sum:"$pop"},
     }
    },
     /* sort by state, population */
    {$sort: 
     {"_id.state":1, "population":-1}
    },
    /* group by state, get the first item in each group */
    {$group: 
     {
 	 _id:"$_id.state",
 	 city: {$first: "$_id.city"},
 	 population: {$first:"$population"}
     }
    }
 ])


 db.posts.aggregate([
    /* unwind by tags */
    {"$unwind":"$tags"},
    /* now group by tags, counting each tag */
    {"$group": 
     {"_id":"$tags",
      "count":{$sum:1}
     }
    },
    /* sort by popularity */
    {"$sort":{"count":-1}},
    /* show me the top 10 */
    {"$limit": 10},
    /* change the name of _id to be tag */
    {"$project":
     {_id:0,
      'tag':'$_id',
      'count' : 1
     }
    }
    ])


 use agg;
 db.inventory.drop();
 db.inventory.insert({'name':"Polo Shirt", 'sizes':["Small", "Medium", "Large"], 'colors':['navy', 'white', 'orange', 'red']})
 db.inventory.insert({'name':"T-Shirt", 'sizes':["Small", "Medium", "Large", "X-Large"], 'colors':['navy', "black",  'orange', 'red']})
 db.inventory.insert({'name':"Chino Pants", 'sizes':["32x32", "31x30", "36x32"], 'colors':['navy', 'white', 'orange', 'violet']})
 db.inventory.aggregate([
    {$unwind: "$sizes"},
    {$unwind: "$colors"},
    {$group: 
     {
 	'_id': {'size':'$sizes', 'color':'$colors'},
 	'count' : {'$sum':1}
     }
    }
 ])

 db.products.aggregate([
 	{$group:{_id:"$manufacturer",num_products:{$sum:1}}} // output _id must be unique
 	{$out: 'collection_name'} // will drop collection if exist !!!
 ])


 // hw5.1: find author with most comments
 db.posts.aggregate([
  {$project: {"_id":0, "authors":"$comments.author"}},
  {$unwind: "$authors"}, 
  {$group: {"_id": "$authors", 'sum': {$sum:1}}},
   {$sort: {"sum": -1}}
 ])

 // hw5.2: find avg population in cities of CA and NY with population over 25000
 db.zips.aggregate([
  {$match: {"state": {$in: ["CA", "NY"]} }},
  {$group: { _id:{ "state": "$state", "city": "$city"}, "pop": {$sum: "$pop"}  }}, 
  {$match: {"pop":{$gt: 25000}}},
  {$group: {"_id": null, "avg_pop": {$avg: "$pop"}}}
 ])

 // hw5.3: class with highest avg students scores (except quizes)
 db.grades.aggregate([
  {$project:{ _id: 0, "class_id": 1, "student_id":1, "scores": 1}},
  {$unwind: "$scores"},
  {$match: {"scores.type": {$ne: "quiz"}}},
  {$group: {"_id": {"class_id": "$class_id", "student_id": "$student_id"}, "avg_score": {$avg: "$scores.score"}}},
  {$group: {"_id": "$_id.class_id", "avg_scores": {$avg: "$avg_score"}}},
 {$sort: {"avg_scores":-1}}
 ])

 // hw5.4: find number of people that live in city that name start with number
 db.zips.aggregate([
  {$match: {"city": {$regex: /^[0-9]/}}},
  {$group: {_id: null, "pop_sum": {$sum: "$pop"}}} 
 ])




 write concern:
 w - 1 (how many nodes to wait for ack write)
 j - false (do not wait for journal to save to disk)
 wtimeout - ho long to wait for ack
 ( all of them may be set at connection, for collection, in rs config)

 replica set:
 1 node is primary (writes and reads), others nodes are secondary (only reads)
 replication is asynchronous
 if primary goes down, on of secondary nodes becomes primary (election)
 minimal number of nodes in replica set: 3

 types of nodes in replica set:
 - regular (has data, can become primary)
 - arbiter (only for voting purposes)
 - delayed (disaster recovery node, set X hour behind other nodes, cannot become primary)
 - hidden (eq. for analytics, cannot become primary)

 strong consistency:
 read and write only from primary nodes

 eventual consistency:
 write to primary, but read from secondary
 you can read what you write, but there may be small delay


 // create replica set
 #!/usr/bin/env bash
 mkdir -p /data/rs1 /data/rs2 /data/rs3
 mongod --replSet m101 --logpath "1.log" --dbpath /data/rs1 --port 27017 --oplogSize 64 --fork --smallfiles
 mongod --replSet m101 --logpath "2.log" --dbpath /data/rs2 --port 27018 --oplogSize 64 --smallfiles --fork
 mongod --replSet m101 --logpath "3.log" --dbpath /data/rs3 --port 27019 --oplogSize 64 --smallfiles --fork


 // init replica set
 config = { _id: "m101", members:[
          { _id : 0, host : "localhost:27017"},
          { _id : 1, host : "localhost:27018"},
          { _id : 2, host : "localhost:27019"} ]
 };
 rs.initiate(config);

 rs.status()
 rs.config() / show rs config
 rs.slaveOk() // allow reads from secondary node
 rs.stepDown() // ??
 rs.help()

 each rs node has oplog (secondary is constantly reading oplog of primary)
 use local
 show collections
 db.oplog.rs.find()

 after failover - node will connect to primary and copy its entire dataset

 proper handling of failover:

 insert: catch AutoReconnect errors: if AutoReconnect occures, sleep X seconds and retry operation

    for retry in range (3):

        try:

            things.insert_one({'_id':i})

            print "Inserted Document: " + str(i)

            time.sleep(.1)

            break

        except pymongo.errors.AutoReconnect as e:

            print "Exception ",type(e), e

            print "Retrying.."

            time.sleep(5)

        except pymongo.errors.DuplicateKeyError as e:

            print "duplicate..but it worked"

            break


 read: retry
 update:
  - idempotent ($set): retry
  - non-idempotent ($inc, $push): ?? depends on application

 read preference:
 - Primary
 - PrimaryPrefered
 - Secondary (only secondary)
 - SecondaryPrefered
 - Nearest (closest pingtime)


 sharding

 mongos - router between sharded hosts
 shared key:
 - every doc must include shared key
 - shared key is immutable
 - shared key must be indexed (or non-multi-key index that starts with shared key)
 - in update shared key must be specified or 'multi'
 - all queries must include shared key, or query will be send to all shards
 - unique key is not allowed (unless it is part of shared key)

 types of sharding:
 - ranged based (s1: 1..100, s2: 101..200, ...)
 - hashed based (hashing over shared key)

 sh.status()



 # final exam

 ## Q1

 ```javascript
 db.messages.find({'headers.From': '[email protected]', 'headers.To': '[email protected]'}, {'headers.From':1, 'headers.To':1})
 ```

 ## Q2

 ```javascript
 // zle
 //db.messages.aggregate( {$project: {'_id': 0, 'From':'$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {'$group': {'_id': {'From':'$From', 'To': '$To'}, 'sum': {$sum:1} }}, {$sort: {"sum": -1}} )

 // dobre
 db.messages.aggregate( {$project: {'From':'$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {$group: {_id: {'Msg': '$_id', 'From': '$From'}, 'To': {$addToSet: '$To'}}}, {$unwind: '$To'}, {'$group': {'_id': {'From':'$_id.From', 'To': '$To'}, 'sum': {$sum:1} }}, {$sort: {"sum": -1}} )
 ```

 ## Q3

 ```javascript
 // get 'To' count
 db.messages.aggregate( {$match: {'headers.Message-ID': '<8147308.1075851042335.JavaMail.evans@thyme>'}}, {$project: {'From': '$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {$group: {'_id':null, 'sum': {$sum: 1}}} )
 // update
 db.messages.update({'headers.Message-ID': '<8147308.1075851042335.JavaMail.evans@thyme>'}, {$push: {'headers.To': '[email protected]'}})
 ```

 ## Q4

 ```python
 try:
  self.posts.update_one({'permalink': permalink}, {'$inc': {"comments.{ord}.num_likes".format(ord=comment_ordinal): 1}})
 except Exception as err:
  print('Error INC likes. Error: {err}'.format(err=err))
 ```
	db.data.drop()

	db.data.insertOne({"name": "val1"})
	db.data.insertOne({"_id": "id100", "name": "val1"})

	db.data.insertMany([
	{ "name": "val1" },
	{ "name": "val2" }
	]
	)

	db.data.insertMany( [
	{ "name": "val1" },
	{ "name": "val2" }
	],
	{ "ordered": false}
	)

	_id, type of ObjectId() by default
	12-byte hex string: 4: date + 3: mac addr + 2: process_id + 3: counter


	db.data.find()
	db.data.find().pretty()

	db.data.find({"rated": "PG-13"})
	db.data.find({"rated": "PG-13"}).count()
	db.data.find({"rated": "PG-13", "year": 2009})

	db.data.find({"tomato.meter": 100})

	db.data.find({"writers": ["Eathan", "Joel"]}) // MATCH ENTIRE ARRAY
	db.data.find({"actors": "Brad"}) // MATCH ANY ELEMENT OF ARRAY
	db.data.find({"actors.0": "Brad"}) // MATCH FIRST ELEMENT OF ARRAY

	// cursor
	var c = db.data.find()
	var doc = function() { return c.hasNext() ? c.next() : null; }
	doc()

	c.objsLeftInBadge()

	// projection
	db.data.find({"rated": "PG-13"}, { title: 1, _id: 0 })

	// query operators

	// comparsion $eq, $ne, $lt, $lte, $gt, $gte, $in, $nin
	db.data.find({"rated": { $ne: "UNRATED" }})
	db.data.find({"runtime": { $gt: 90 }})
	db.data.find({"runtime": { $gte: 90, $lte: 120 }})
	db.data.find({"tomato.meter": {$gt: 95}, "runtime": { $gte: 90, $lte: 120 }})
	db.data.find({"rated": { $in: ["G", "PG"] }})

	// element $exists, $type
	db.data.find({"tomate.meter": { $exists: true }}).count()
	db.data.find({"tomate.meter": { $exists: false }}).count()
	.db.data.find({"_id": { $type: "string" }})

	// logical operators $or $and $not $nor
	db.data.find( { $or: [ {"tomato.eter": { $gte: 95}, { "metacritic": $gt: 88} ] } )
	db.data.find({ $and: [ {"metacritic: {$not: null}}, {"metacritic": {$exists: true}} ] })

	// regex
	d.data.find({ "awards.text": { $regex: /^Won\s.*/ } })
	d.data.find({ "awards.text": { $regex: '^Won\s.*' } })
	d.data.find({ "awards.text": { $regex: '^Won\s.*', $options: "i" } })

	// arays operator
	{ "genre": { $all: ["Comedy", "Crime", "Drama"] } }
	{ "country": { $size: 1 } }
	{ "boxoffice": { $elemMatch: { "country": "UK", "revenue": { $gt: 15} } } }

	db.movieDetails.find({'awards.oscars.award': 'bestPicture'})
	db.movieDetails.updateMany({ year: {$gte: 2010, $lte: 2013},
	"imdb.votes": {$lt: 10000},
	$and: [{"tomato.consensus": {$exists: true} },
	{"tomato.consensus": null} ] },
	{ $unset: { "tomato.consensus": "" } });

	// update

	db.data.updateOne({"title": "The Martian"}, {$set: {"poster": "http://d.c"})
	db.data.updateOne({"title": "The Martian"}, {$set: {"awards": {"wins": 0, "nominations": 2}}})

	// update operators

	// $set, $unset, $rename, $min, $max, $inc, $mul, ...
	db.data.updateOne({"title": "Martian"}, {$inc: {"tomao.revies": 4, "tomato.userReviews: 3}})

	// arrays operators: $addToSet $pop $pull $pullAll $push
	db.data.updateOne(
	{"title": "Martian"},
	{ $push : { "reviews":
	{ "rating": 1, "author": "Someone", "text": ".."}
	}
	}
	)
	db.data.updateOne(
	{"title": "Martian"},
	{ $push : { "reviews":
	{ $each: [
	{ "rating": 1, "author": "Someone", "text": ".."},
	{ "rating": 2, "author": "Someone2", "text": ".."}
	]
	}
	}
	}
	)

	// update many
	db.data.updateMany({"rated": null}, {$set: {"rated": "UNRATED"} })
	db.data.updateMany({"rated": null}, {$unset: {"rated": ""} })

	// upsert
	db.data.update({...}, {...}, "upsert": true)

	// replaceOne

	db.data.replaceOne()

	SORT -> SKIP -> LIMIT


	HOMEWORK
	1:
	db.grades.find({'score': {$gte: 65}}, {'_id': 0, 'student_id': 1}).sort({'score': 1}).limit(1)

	2:
	```python
	for doc in m.students.grades.find({'type': 'homework'}).sort([('student_id', pymongo.ASCENDING),('score', pymongo.ASCENDING)]):
	if last_student != doc['student_id']:
	counter += 1
	last_student = doc['student_id']
	g.remove({'_id': doc['_id']})
	```

	3:





	ch4:

	// hint to use index

	db.students.find({student_id:{$gt:500000}, class_id:54}).sort({student_id:1}).hint({class_id:1}).explain("executionstats")

	// profiling

	db.getProfilingStatus()
	db.getProfilingLevel()

	db.setProfilingLevel(0) // do not log queries
	db.setProfilingLevel(1, 4) // log queries slower than 4s
	db.setProfilingLevel(2) // log all queries

	db.system.profile.find()
	db.system.profile.find().sort({ts:1})
	db.system.profile.find({ns:/school.students/})
	db.system.profile.find({millis: {$lt:1}}, {ts:1, millis:1}).sort({ts:-1})

	mongotop
	mongotop 5 // 5 seconds refresh

	mongotop
	mongostat




	storage engines

	mmap v1:
	collection level locking
	in place updates
	power of two sizes allocations

	wired tiger:
	document level concurrency (no locking is needed)
	compression (documents data, indexes)
	append only (no inplace updates)

	killall mongod
	mkdir WT
	mongod -dbpath WT -storageEngine wiredTiger
	mongod --dbpath WT --storageEngine wiredTiger -wiredTigerIndexPrefixCompression true
	db.foo.stats()

	index: btree

	db.students.createIndex({student_id:1})
	db.students.createIndex({student_id:1, class_id:-1})
	db.students.createIndex({location:'2d'})
	db.students.createIndex({location:'2dsphere'})

	// full text search
	db.students.createIndex({location:'text'})
	db.data.find({'$text': {'$search': 'dog cat'}}) //find dog or cat in document

	db.students.getIndexes()
	db.students.dropIndex({student_id:1})
	// multikey indexes - on array

	db.students.createIndex({student_id:1}, {'unique':true})
	db.students.createIndex({student_id:1}, {'unique':true, 'sparse':true}) // sprase - do not use null values in index
	db.students.createIndex({student_id:1}, {'background':true})

	foreground index creation:
	fast, block W/R to whole DB (all collections)
	background index creation:
	slow, no blocks

	explain: find, update, remove, aggregate (wont work with insert)

	db.students.explain().find({student_id: 5}) //COLLSCAN, docsExamined
	db.students.explain(true).find({student_id: 5}) //COLLSCAN, docsExamined

	var exp = db.students.explain()
	exp.help()
	exp.find({'name': 'noo'}).sort('name':1)

	explain verbosity:
	queryPlanner // default
	executionStats
	allPlansExecution

	var exp = db.students.explain('executionStats')
	var exp = db.students.explain('allPlansExecution')

	covered query: query result is satisfied with index (no documents need to be inspected)
	NO db.ijk_data.find({'i':10, 'k':20})
	NO db.ijk_data.find({'i':10, 'k':20}, {'_id': 0})
	YES db.ijk_data.find({'i':10, 'k':20}, {'_id': 0, 'i':1, 'j':1, 'k':1})

	db.examples.stats()
	db.examples.totalIndexSize()

	db.students.find({'scores': { $elemMatch: {'type':'exam', 'score': {$gt: 98}} } })

	db.stores.find({ loc:{ $near: { $geometry: { type: "Point", coordinates: [-130, 39]}, $maxDistance:1000000 } } })

	for (i=0; i<10; i++) { db.ex.insert({'a': i*i}) }
	for (i=0; i<10; i++) { x=[]; for (j=0; j<10; j++) { x.push({"key": i, "val": j}); }; db.ex.insert({'arr': x}); }



	agregation pipeline:
	$project (reshape doc)
	$match
	$group
	$sort (100mb limit to memory based sort, or disk based sort)
	$skip (after sort)
	$limit (after sort)
	$unwind (nornalize, explode data)
	$out (redirect to coll)
	$redact
	$geoner

	$group:
	$sum $avg $min $max
	$push $addtoset (for building arrays)
	$first $last (only with $sort)

	explain: db.zips.aggregate([...], {explain:true})
	allowDiskUse (100mb limit for in-memory aggregation): db.zips.aggregate([...], {allowDiskUse:true})
	cursor

	db.products.aggregate([{$group:{_id:"$manufacturer",num_products:{$sum:1}}}])
	db.products.aggregate([{$group:{_id:{"cat":"$category"},num_products:{$sum:1}}}])
	db.products.aggregate([{$group:{_id:{"maker": "$manufacturer", "cat":"$category"},num_products:{$sum:1}}}])

	db.products.aggregate([{$group:{_id:{"maker": "$manufacturer"}, sum_price:{$sum:"$price"}}}])
	db.zips.aggregate([{$group:{ _id: "$state", "population": {$sum:"$pop"}}}])

	db.zips.aggregate([{$group:{ _id: "$_id", "average_pop": {$avg:"$pop"}}}])
	db.zips.aggregate([{"$group":{"_id":"$state", "average_pop":{"$avg":"$pop"}}}])

	db.zips.aggregate([{$group: {_id: "$city", "postal_codes": {$addToSet: "$_id"}}}])
	db.zips.aggregate([{$group: {_id: "$state", "pop": {$max: "$pop"}}}])

	db.grades.aggregate([
	{'$group':{_id:{class_id:"$class_id", student_id:"$student_id"}, 'average':{"$avg":"$score"}}},
	{'$group':{_id:"$_id.class_id", 'average':{"$avg":"$average"}}}])

	db.products.aggregate([
	{$project: {
	_id:0,
	'maker': {$toLower:"$manufacturer"},
	'details': {'category': "$category",
	'price' : {"$multiply":["$price",10]}
	},
	'item':'$name'
	}}
	])

	db.zips.aggregate([{$project: {_id:0, "city": {$toLower:"$city"}, "pop":1, "state":1, "zip":"$_id"}}])

	db.zips.aggregate([
	{$match:
	{
	state:"NY"
	}
	},
	{$group:
	{
	_id: "$city",
	population: {$sum:"$pop"},
	zip_codes: {$addToSet: "$_id"}
	}
	},
	{$project:
	{
	_id: 0,
	city: "$_id",
	population: 1,
	zip_codes:1
	}
	}

	])

	db.zips.aggregate([{$match:{pop:{$gt:100000}}}])

	db.zips.aggregate([
	{$match: {"state" : "NY"}},
	{$group: {_id: "$city", "pop": {$sum: "$pop"}}},
	{$project: {_id:0, "city": "$_id", pop:1}},
	{$sort: {pop:-1}} ])


	db.zips.aggregate([
	{$match:{pop:{$gt:100000}}}
	{$skip: 4},
	{$limit: 10}
	])

	use agg
	db.zips.aggregate([
	/* get the population of every city in every state */
	{$group:
	{
	_id: {state:"$state", city:"$city"},
	population: {$sum:"$pop"},
	}
	},
	/* sort by state, population */
	{$sort:
	{"_id.state":1, "population":-1}
	},
	/* group by state, get the first item in each group */
	{$group:
	{
	_id:"$_id.state",
	city: {$first: "$_id.city"},
	population: {$first:"$population"}
	}
	}
	])


	db.posts.aggregate([
	/* unwind by tags */
	{"$unwind":"$tags"},
	/* now group by tags, counting each tag */
	{"$group":
	{"_id":"$tags",
	"count":{$sum:1}
	}
	},
	/* sort by popularity */
	{"$sort":{"count":-1}},
	/* show me the top 10 */
	{"$limit": 10},
	/* change the name of _id to be tag */
	{"$project":
	{_id:0,
	'tag':'$_id',
	'count' : 1
	}
	}
	])


	use agg;
	db.inventory.drop();
	db.inventory.insert({'name':"Polo Shirt", 'sizes':["Small", "Medium", "Large"], 'colors':['navy', 'white', 'orange', 'red']})
	db.inventory.insert({'name':"T-Shirt", 'sizes':["Small", "Medium", "Large", "X-Large"], 'colors':['navy', "black", 'orange', 'red']})
	db.inventory.insert({'name':"Chino Pants", 'sizes':["32x32", "31x30", "36x32"], 'colors':['navy', 'white', 'orange', 'violet']})
	db.inventory.aggregate([
	{$unwind: "$sizes"},
	{$unwind: "$colors"},
	{$group:
	{
	'_id': {'size':'$sizes', 'color':'$colors'},
	'count' : {'$sum':1}
	}
	}
	])

	db.products.aggregate([
	{$group:{_id:"$manufacturer",num_products:{$sum:1}}} // output _id must be unique
	{$out: 'collection_name'} // will drop collection if exist !!!
	])


	// hw5.1: find author with most comments
	db.posts.aggregate([
	{$project: {"_id":0, "authors":"$comments.author"}},
	{$unwind: "$authors"},
	{$group: {"_id": "$authors", 'sum': {$sum:1}}},
	{$sort: {"sum": -1}}
	])

	// hw5.2: find avg population in cities of CA and NY with population over 25000
	db.zips.aggregate([
	{$match: {"state": {$in: ["CA", "NY"]} }},
	{$group: { _id:{ "state": "$state", "city": "$city"}, "pop": {$sum: "$pop"} }},
	{$match: {"pop":{$gt: 25000}}},
	{$group: {"_id": null, "avg_pop": {$avg: "$pop"}}}
	])

	// hw5.3: class with highest avg students scores (except quizes)
	db.grades.aggregate([
	{$project:{ _id: 0, "class_id": 1, "student_id":1, "scores": 1}},
	{$unwind: "$scores"},
	{$match: {"scores.type": {$ne: "quiz"}}},
	{$group: {"_id": {"class_id": "$class_id", "student_id": "$student_id"}, "avg_score": {$avg: "$scores.score"}}},
	{$group: {"_id": "$_id.class_id", "avg_scores": {$avg: "$avg_score"}}},
	{$sort: {"avg_scores":-1}}
	])

	// hw5.4: find number of people that live in city that name start with number
	db.zips.aggregate([
	{$match: {"city": {$regex: /^[0-9]/}}},
	{$group: {_id: null, "pop_sum": {$sum: "$pop"}}}
	])




	write concern:
	w - 1 (how many nodes to wait for ack write)
	j - false (do not wait for journal to save to disk)
	wtimeout - ho long to wait for ack
	( all of them may be set at connection, for collection, in rs config)

	replica set:
	1 node is primary (writes and reads), others nodes are secondary (only reads)
	replication is asynchronous
	if primary goes down, on of secondary nodes becomes primary (election)
	minimal number of nodes in replica set: 3

	types of nodes in replica set:
	- regular (has data, can become primary)
	- arbiter (only for voting purposes)
	- delayed (disaster recovery node, set X hour behind other nodes, cannot become primary)
	- hidden (eq. for analytics, cannot become primary)

	strong consistency:
	read and write only from primary nodes

	eventual consistency:
	write to primary, but read from secondary
	you can read what you write, but there may be small delay


	// create replica set
	#!/usr/bin/env bash
	mkdir -p /data/rs1 /data/rs2 /data/rs3
	mongod --replSet m101 --logpath "1.log" --dbpath /data/rs1 --port 27017 --oplogSize 64 --fork --smallfiles
	mongod --replSet m101 --logpath "2.log" --dbpath /data/rs2 --port 27018 --oplogSize 64 --smallfiles --fork
	mongod --replSet m101 --logpath "3.log" --dbpath /data/rs3 --port 27019 --oplogSize 64 --smallfiles --fork


	// init replica set
	config = { _id: "m101", members:[
	{ _id : 0, host : "localhost:27017"},
	{ _id : 1, host : "localhost:27018"},
	{ _id : 2, host : "localhost:27019"} ]
	};
	rs.initiate(config);

	rs.status()
	rs.config() / show rs config
	rs.slaveOk() // allow reads from secondary node
	rs.stepDown() // ??
	rs.help()

	each rs node has oplog (secondary is constantly reading oplog of primary)
	use local
	show collections
	db.oplog.rs.find()

	after failover - node will connect to primary and copy its entire dataset

	proper handling of failover:

	insert: catch AutoReconnect errors: if AutoReconnect occures, sleep X seconds and retry operation

	for retry in range (3):

	try:

	things.insert_one({'_id':i})

	print "Inserted Document: " + str(i)

	time.sleep(.1)

	break

	except pymongo.errors.AutoReconnect as e:

	print "Exception ",type(e), e

	print "Retrying.."

	time.sleep(5)

	except pymongo.errors.DuplicateKeyError as e:

	print "duplicate..but it worked"

	break


	read: retry
	update:
	- idempotent ($set): retry
	- non-idempotent ($inc, $push): ?? depends on application

	read preference:
	- Primary
	- PrimaryPrefered
	- Secondary (only secondary)
	- SecondaryPrefered
	- Nearest (closest pingtime)


	sharding

	mongos - router between sharded hosts
	shared key:
	- every doc must include shared key
	- shared key is immutable
	- shared key must be indexed (or non-multi-key index that starts with shared key)
	- in update shared key must be specified or 'multi'
	- all queries must include shared key, or query will be send to all shards
	- unique key is not allowed (unless it is part of shared key)

	types of sharding:
	- ranged based (s1: 1..100, s2: 101..200, ...)
	- hashed based (hashing over shared key)

	sh.status()



	# final exam

	## Q1

	```javascript
	db.messages.find({'headers.From': '[email protected]', 'headers.To': '[email protected]'}, {'headers.From':1, 'headers.To':1})
	```

	## Q2

	```javascript
	// zle
	//db.messages.aggregate( {$project: {'_id': 0, 'From':'$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {'$group': {'_id': {'From':'$From', 'To': '$To'}, 'sum': {$sum:1} }}, {$sort: {"sum": -1}} )

	// dobre
	db.messages.aggregate( {$project: {'From':'$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {$group: {_id: {'Msg': '$_id', 'From': '$From'}, 'To': {$addToSet: '$To'}}}, {$unwind: '$To'}, {'$group': {'_id': {'From':'$_id.From', 'To': '$To'}, 'sum': {$sum:1} }}, {$sort: {"sum": -1}} )
	```

	## Q3

	```javascript
	// get 'To' count
	db.messages.aggregate( {$match: {'headers.Message-ID': '<8147308.1075851042335.JavaMail.evans@thyme>'}}, {$project: {'From': '$headers.From', 'To': '$headers.To'}}, {$unwind: '$To'}, {$group: {'_id':null, 'sum': {$sum: 1}}} )
	// update
	db.messages.update({'headers.Message-ID': '<8147308.1075851042335.JavaMail.evans@thyme>'}, {$push: {'headers.To': '[email protected]'}})
	```

	## Q4

	```python
	try:
	self.posts.update_one({'permalink': permalink}, {'$inc': {"comments.{ord}.num_likes".format(ord=comment_ordinal): 1}})
	except Exception as err:
	print('Error INC likes. Error: {err}'.format(err=err))
	```