NoSQL
MongoDB
https://www.itread01.com/content/1541437023.html Hbase
https://www.infoq.cn/article/JAl9mSCpyS8vX80TYc4D
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html
https://stackoverflow.com/questions/41498672/how-to-convert-an-500gb-sql-table-into-apache-parquet
Apache arrow / parquet 是 file bard columnar store 可以只讀整個 dataset 其中你需要的幾個 columns
https://medium.com/@danielmiller5791/using-airflow-and-spark-to-crunch-us-immigration-data-aa7c95b37031
和關聯資料庫裡的 exists 不同, 下列範例是傳回不包含欄位 id 的所有記錄
> db.customers.find({id:{$exists:false}})
why-sql-beating-nosql-what-this-means-for-future-of-data-time-series-database
from pymongo import MongoClient client = MongoClient() db = client.nobel_prize coll = db.winners from pymongo import MongoClient def get_mongo_database(db_name, host='localhost', port=27017, username=None, password=None): if username and password: mongo_uri = 'mongodb://%s:%s@%s/%s'%(username, password, host, db_name) conn = MongoClient(mongo_uri) else: conn = MongoClient(host, port) return conn[db_name] db = get_mongo_dataase(DB_NOBEL_PRIZE) coll = db[COLL_WINNERS] nobel_winners = [ {'category': 'Physics', 'name': 'Albert Einstein', 'nationality': 'Swiss', 'sex': 'male', 'year': 1921}, ] coll.insert(nobel_winners) res = coll.find({'category':'Physics'}) list(res) res = coll.find({'$or':[{'year': {'$gt': 1930}}, {'sex':'female'}]}) list(res) # 預設要刪除 ID def mongo_coll_to_dicts(dbname='test', collname=’test’, query={}, del_id=True, **kw): db = get_mongo_database(dbname, **kw) res = list(db[collname].find(query)) if del_id: for r in res: r.pop('_id') return res REST_EU_ROOT_URL = "http://restcountries.eu/rest/v1" def REST_country_request(field='all', name=None, params=None): headers={'User-Agent': 'Mozilla/5.0'} if not params: params = {} if field == 'all': return requests.get(REST_EU_ROOT_URL + '/all') url = '%s/%s/%s ' % (REST_EU_ROOT_URL, field, name) print('Requesting URL: ' + url) response = requests.get(url, params=params, headers=headers) if not response.status_code == 200: raise Exception('Request failed with status code ' + str(response.status_code)) return response response = REST_country_request('currency', 'usd') response.json() db_nobel = get_mongo_database('nobel_prize') col = db_nobel['country_data'] response = REST_country_request() col.insert(response.json()) res = col.find({'currencies':{'$in':['USD']}}) list(res)