python操作elasticsearch

1950阅读 0评论2018-11-02 chengxuyonghu
分类:系统运维

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search,Q,scan
es = Elasticsearch()


1、仿照kibana中的写法
q = {"query": {"match": {"name": "easy"}}}
res = es.Search(body=q)
#这中方式返回的结果和kibana查询结果一致,为json


2、通过elasticsearch_dsl的Search方法
s = Search().using(es).query('match',name='easy') 
res = s.execute()  
# MultiMatch(query='python django', fields=['title', 'body'])
# Match(title={"query": "web framework", "type": "phrase"})




3、通过Q方式写查询语句(避免了字典的多重嵌套)       
q = Q("match",name='easy')
s = Search().using(es).query(q) 
res = s.execute(ignore_cache=True)
#ignore_cache=True 清除缓存数据,不然即使改变了查询语句执行出的为之前的结果
可通过s.to_dict()验证语句
可通过s.query = Q("match",name='haha') 重建查询语句
#通过dsl的查询得到的结果有自己的调用方法 res.hits[0].id


4、通过scan的方式
q = {"query": {"match": {"name": "easy"}}}
res = scan(es,size=100,preserve_order=True,query=q)
#本机上实验size参数无作用,总是返回所有结果,preserve_order=True设置为flase或不设置会报错,希望解答
doc = [i for i in res] #即可得到所有返回结果










import traceback
from pymongo import MongoClient
from elasticsearch import Elasticsearch


# 建立到MongoDB的连接
_db = MongoClient('mongodb://127.0.0.1:27017')['blog']


# 建立到Elasticsearch的连接
_es = Elasticsearch()


# 初始化索引的Mappings设置
_index_mappings = {
  "mappings": {
    "user": { 
      "properties": { 
        "title":    { "type": "text"  }, 
        "name":     { "type": "text"  }, 
        "age":      { "type": "integer" }  
      }
    },
    "blogpost": { 
      "properties": { 
        "title":    { "type": "text"  }, 
        "body":     { "type": "text"  }, 
        "user_id":  {
          "type":   "keyword" 
        },
        "created":  {
          "type":   "date"
        }
      }
    }
  }
}


# 如果索引不存在,则创建索引
if _es.indices.exists(index='blog_index') is not True:
  _es.indices.create(index='blog_index', body=_index_mappings) 


# 从MongoDB中查询数据,由于在Elasticsearch使用自动生成_id,因此从MongoDB查询
# 返回的结果中将_id去掉。
user_cursor = db.user.find({}, projection={'_id':False})
user_docs = [x for x in user_cursor]


# 记录处理的文档数
processed = 0
# 将查询出的文档添加到Elasticsearch中
for _doc in user_docs:
  try:
    # 将refresh设为true,使得添加的文档可以立即搜索到;
    # 默认为false,可能会导致下面的search没有结果
    _es.index(index='blog_index', doc_type='user', refresh=True, body=_doc)
    processed += 1
    print('Processed: ' + str(processed), flush=True)
  except:
    traceback.print_exc()


# 查询所有记录结果
print('Search all...',  flush=True)
_query_all = {
  'query': {
    'match_all': {}
  }
}
_searched = _es.search(index='blog_index', doc_type='user', body=_query_all)
print(_searched, flush=True)


# 输出查询到的结果
for hit in _searched['hits']['hits']:
  print(hit['_source'], flush=True)


# 查询姓名中包含jerry的记录
print('Search name contains jerry.', flush=True)
_query_name_contains = {
  'query': {
    'match': {
      'name': 'jerry'
    }
  }
}
_searched = _es.search(index='blog_index', doc_type='user', body=_query_name_contains)
print(_searched, flush=True)




















#coding=utf-8
from datetime import datetime
from elasticsearch import Elasticsearch


# 连接elasticsearch,默认是9200
es = Elasticsearch()


# 创建索引,索引的名字是my-index,如果已经存在了,就返回个400,
# 这个索引可以现在创建,也可以在后面插入数据的时候再临时创建
# es.indices.create(index='my-index', ignore)
#es.indices.create(index='my-index', )


# 也可以,在插入数据的时候再创建索引"my_index",单个插入12条数据
# es.index(index="my-index", doc_type="my-type", id=1, body={"name": "xiaoming", "age": 18})
# es.index(index="my-index", doc_type="my-type", id=2, body={"name": "daming", "age": 18})
# es.index(index="my-index", doc_type="my-type", id=3, body={"name": "xiaoxue", "age": 19})
# es.index(index="my-index", doc_type="my-type", id=4, body={"name": "daxue", "age": 19})
# es.index(index="my-index", doc_type="my-type", id=5, body={"name": "xiaojun", "age": 20})
# es.index(index="my-index", doc_type="my-type", id=6, body={"name": "dajun", "age": 20})
# es.index(index="my-index", doc_type="my-type", id=7, body={"name": "xiaohua", "age": 21})
# es.index(index="my-index", doc_type="my-type", id=8, body={"name": "dahua", "age": 21})
# es.index(index="my-index", doc_type="my-type", id=9, body={"name": "xiaozhang", "age": 22})
# es.index(index="my-index", doc_type="my-type", id=10, body={"name": "dazhang", "age": 22})
# es.index(index="my-index", doc_type="my-type", id=11, body={"name": "xiaomei", "age": 23})
# es.index(index="my-index", doc_type="my-type", id=12, body={"name": "damei", "age": 23})


#根据id删除数据
# es.delete(index="my-index", doc_type="my-type", id=1)


# 获取age=18的所有值match
# res = es.search(index="my-index", body={'query': {'match': {'age':18}}})


# 获取所有值match_all
res = es.search(index="my-index", body={'query': {'match_all': {}}})
print res
print '*'*100


# 查询数据,两种get and search
# get获取
# res = es.get(index="my-index", doc_type="my-type", id=01)
# print'res_001 = ',res
print '*'*100


# search获取
# res = es.search(index="test-index", body={"query": {"match_all": {}}})
print '*'*100




# CURL的操作


# ES查询


# curl是利用URL语法在命令行方式下工作的开源文件传输工具,使用curl可以简单实现常见的get/post请求。简单的认为是可以在命令行下面访问url的一个工具。
# curl
#     -X 指定http的请求方法 有HEAD GET POST PUT DELETE
#     -d 指定要传输的数据
#     -H 指定http请求头信息


# elasticsearch rest api遵循的格式为:
# curl -X :///


# 检查es版本信息
# curl IP:9200


# 查看集群是否健康
# curl




# 查看节点列表
# curl


# 列出所有索引及存储大小
# curl




# 创建索引
# 创建索引名为XX,默认会有5个分片,1个索引
# curl -XPUT 'IP:9200/XX?pretty'
# 常见后查看,列出所有索引及存储大小
# curl






# 示例 es.index(index="my-index", doc_type="my-type", id=1, body={"name": "xiaoming", "age": 18})数据格式如此所示,由以上文中python代码插入的数据
# 01、显示es的版本等信息
# curl -XGET


# 02、curl命令查询某个索引库index下的所有数据(在url后面加上一个pretty则会对返回结果进行格式化)
# curl -XGET /索引/_search?pretty
# curl -XGET /my-index/_search?pretty


# 03、curl命令查询某个type下的所有数据(在url后面加上一个pretty则会对返回结果进行格式化)
# curl -XGET /索引/类型/_search?pretty
# curl -XGET /my-index/my-type/_search?pretty


#
# 04、根据id查询具体的一条记录:
# curl -XGET /my-index/my-type/1?pretty
# 输出
# {
#   "_index" : "my-index",
#   "_type" : "my-type",
#   "_id" : "1",
#   "_version" : 2,
#   "found" : true,
#   "_source" : {
#     "age" : 18,
#     "name" : "xiaoming"
#   }
# }


# 05、查询一条索引文档中的具体的字段:
# curl -XGET /my-index/my-type/1?_source=name
# 输出
# {"_index":"my-index","_type":"my-type","_id":"1","_version":2,"found":true,"_source":{"name":"xiaoming"}}


# 06、如果要查询多个字段,使用","进行隔开
# curl -XGET /my-index/my-type/1?_source=name,age
# 输出
# {"_index":"my-index","_type":"my-type","_id":"1","_version":2,"found":true,"_source":{"name":"xiaoming","age":18}}


# 07、获取source所有数据
# curl -XGET /my-index/my-type/1?_source
# 输出
# {"_index":"my-index","_type":"my-type","_id":"1","_version":2,"found":true,"_source":{"name":"xiaoming","age":18}}


# 08、根据条件进行查询name是xiaoming的
# curl -XGET /my-index/my-type/_search?q=name:xiaoming
# 输出
# {"took":14,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.2876821,"hits":[{"_index":"my-index","_type":"my-type","_id":"1","_score":0.2876821,"_source":{"age":18,"name":"xiaoming"}}]}}


# 09、根据条件进行查询name是xiaoming和name是xiaoxue的,xiaoming和xiaoxue之间用","隔开
# curl -XGET /my-index/my-type/_search?q=name:xiaoming, xiaoxue
# 输出
# {"took":134,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":2,"max_score":0.9808292,"hits":[{"_index":"my-index","_type":"my-type","_id":"2","_score":0.9808292,"_source":{"age":19,"name":"xiaoxue"}},{"_index":"my-index","_type":"my-type","_id":"1","_score":0.2876821,"_source":{"age":18,"name":"xiaoming"}}]}}


# ES更新
########################################### 报406错误,未解决#######################################################
# ES使用PUT或者POST对文档进行更新,如果指定ID的文档已经存在,则执行更新操作
# 注意:执行更新操作的时候,ES首先将旧的文档标记为删除状态,然后添加新的文档,旧的文档不会立即消失,但是你也无法访问,ES会继续添加更多数据的时候在后台清理已经标记为删除状态的文档。
# 局部更新
# 可以添加新字段或者更新已经存在字段(必须使用POST)
# curl -XPOST /my-index/my-type/1/_update -d '{"doc":{"name": "xuexue", "age": "11"}}'
########################################### 报406错误,未解决#######################################################


# 10、使用文件的方式
# curl - XPOST / PUT http: // master:9200 / index / type / _bulk - -data - binary @ path
# 比如
# curl - XPOST ' --data-binary @/home/uplooking/Documents/accounts.json
--------------------- 


上一篇:linux配置虚拟IP--VIP
下一篇:python读取文本内容排序