Thrift 中以GBK传输中文字符和分词服务搭建
项目中需要将分词做成线上服务形式,服务后端用c++实现,客户端用java实现,来调用分词的服务。由于分词程序默认是以GBK编码为准,但是JAVA读写字符串的固定编码为utf-8,需要在数据传输的时候以GBK编码方式传输,Thrift 的JAVA库中不支持以GBK方式传输字符串,而且预期不会增加如此功能(Support non-UTF-8 in Java and C#),原因参考Support non-UTF-8 in Java;c/c++中传输的字符串是字节序列,不存在编码的问题。通过阅读TProtocol相关代码,只需要重写readString 和writeString中的字符串读写编码方式即可,重写的类的完整实现如下:[java]import java.io.UnsupportedEncodingException;import java.nio.ByteBuffer;import org.apache.thrift.ShortStack;import org.apache.thrift.TException;import org.apache.thrift.protocol.*;import org.apache.thrift.transport.TTransport;public class GBKCompactProtocol extends TProtocol {private static final TStruct ANONYMOUS_STRUCT = new TStruct("");private static final TField TSTOP = new TField("", (byte) 0, (short) 0);private static final byte[] ttypeToCompactType = new byte[16];private static final byte PROTOCOL_ID = -126;private static final byte VERSION = 1;private static final byte VERSION_MASK = 31;private static final byte TYPE_MASK = -32;private static final int TYPE_SHIFT_AMOUNT = 5;private ShortStack lastField_ = new ShortStack(15);private short lastFieldId_ = 0;private TField booleanField_ = null;private Boolean boolValue_ = null;byte[] i32buf = new byte[5];byte[] varint64out = new byte[10];private byte[] byteDirectBuffer = new byte[1];byte[] byteRawBuf = new byte[1];public GBKCompactProtocol(TTransport transport) {super(transport);}public void reset() {this.lastField_.clear();this.lastFieldId_ = 0;}public void writeMessageBegin(TMessage message)throws TException {writeByteDirect((byte) -126);writeByteDirect(0x1 | message.type << 5 & 0xFFFFFFE0);writeVarint32(message.seqid);writeString(message.name);}public void writeStructBegin(TStruct struct)throws TException {this.lastField_.push(this.lastFieldId_);this.lastFieldId_ = 0;}public void writeStructEnd()throws TException {this.lastFieldId_ = this.lastField_.pop();}public void writeFieldBegin(TField field)throws TException {if (field.type == 2) {this.booleanField_ = field;} else writeFieldBeginInternal(field, (byte) -1);}private void writeFieldBeginInternal(TField field, byte typeOverride)throws TException {byte typeToWrite = typeOverride == -1 ? getCompactType(field.type) : typeOverride;if ((field.id > this.lastFieldId_) && (field.id - this.lastFieldId_ <= 15)) {writeByteDirect(field.id - this.lastFieldId_ << 4 | typeToWrite);} else {writeByteDirect(typeToWrite);writeI16(field.id);}this.lastFieldId_ = field.id;}public void writeFieldStop()throws TException {writeByteDirect((byte) 0);}public void writeMapBegin(TMap map)throws TException {if (map.size == 0) {writeByteDirect(0);} else {writeVarint32(map.size);writeByteDirect(getCompactType(map.keyType) << 4 | getCompactType(map.valueType));}}public void writeListBegin(TList list)throws TException {writeCollectionBegin(list.elemType, list.size);}public void writeSetBegin(TSet set)throws TException {writeCollectionBegin(set.elemType, set.size);}public void writeBool(boolean b)throws TException {if (this.booleanField_ != null) {writeFieldBeginInternal(this.booleanField_, (byte) (b ? 1 : 2));this.booleanField_ = null;} else {writeByteDirect((byte) (b ? 1 : 2));}}public void writeByte(byte b)t补充:软件开发 , Java ,
上一个:java.lang.RuntimeException: Invalid action class configuration that references an unknown class name
下一个:Firefox和IE兼容性问题及解决方法
- 更多JAVA疑问解答:
- java怎么在线读取ftp服务器上的文件内容
- 关于程序员的职业规划
- HTML和JSP矛盾吗?
- java小程序如何打包?
- java怎么split路径文件名?
- jsp+javaBean中Column 'ordersPrice' specified twice的错误
- Java TCP/IP Socket网络编程系列
- 大家来讨论一下我到底该用什么好?Swing 还是 JavaFX
- 关于Hibernate实体自身多对一的抓取问题
- 关于apache2+tomcat群集出现的问题
- spring 获取上下文问题
- SSH 导入导出excel 谁有这块的资料吗?
- Ext TreePanel 刷新问题
- springmvc 加载一个jsp页面执行多个方法 报404
- checkbox数组action怎么向页面传值