//
xiaoaoaode
Published on 2020-08-10 / 157 Visits
0

_IO_FILE结构与fread函数知识点小结

本文简单介绍一些有关_IO_FILE结构与fread函数的知识点。

首先需要下载一个glibc的源代码,我这里看的是glibc-2.23的源代码。

(glibc的源代码可以从gnu的官网下载,如果嫌速度太慢,可以找国内的镜像站下载,这里我推荐从清华大学开源软件镜像站的相关位置下载)

(博主还是菜鸟,有些知识可能理解不够透彻,有些表述可能不够严谨,欢迎大家指正,望大家多多包涵)

_IO_FILE结构

_IO_FILE在源码的.\libio\libio.h这个位置

源码:

struct _IO_FILE {
  int _flags;		/* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

  /* The following pointers correspond to the C++ streambuf protocol. */
  /* Note:  Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
  char* _IO_read_ptr;	/* Current read pointer */
  char* _IO_read_end;	/* End of get area. */
  char* _IO_read_base;	/* Start of putback+get area. */
  char* _IO_write_base;	/* Start of put area. */
  char* _IO_write_ptr;	/* Current put pointer. */
  char* _IO_write_end;	/* End of put area. */
  char* _IO_buf_base;	/* Start of reserve area. */
  char* _IO_buf_end;	/* End of reserve area. */
  /* The following fields are used to support backing up and undo. */
  char *_IO_save_base; /* Pointer to start of non-current get area. */
  char *_IO_backup_base;  /* Pointer to first valid character of backup area */
  char *_IO_save_end; /* Pointer to end of non-current get area. */

  struct _IO_marker *_markers;

  struct _IO_FILE *_chain;

  int _fileno;
#if 0
  int _blksize;
#else
  int _flags2;
#endif
  _IO_off_t _old_offset; /* This used to be _offset but it's too small.  */

#define __HAVE_COLUMN /* temporary */
  /* 1+column number of pbase(); 0 is unknown. */
  unsigned short _cur_column;
  signed char _vtable_offset;
  char _shortbuf[1];

  /*  char* _save_gptr;  char* _save_egptr; */

  _IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

这里_flags是某种标记,貌似每个不同时候使用fopen打开的文件所对应的值都不一样,不过在我调试的时候发现这个变量有一部分是不变的,如:我调试的某个程序中_IO_2_1_stdin_所对应的_flags为0x00000000fbad2088其中fbad那部分在其他文件流的_IO_FILE_结构里也是一样的,可能这就是上面源码里说的_IO_MAGIC

之后的一堆char指针是用来定位具体的读,写的情况的,这个在这里就不多讲了需要配合fwrite,fread等函数的源码才能讲清楚。

_chain变量是指向下一个_IO_FILE_结构的,如果你只自行使用fopen打开一个文件,那么它将指向_IO_2_1_stderr_,然后_IO_2_1_stderr__chain部分指向_IO_2_1_stdout_,而且文件描述符_fileno也是逐级递减的,所以_chain可以看作使用头插法添加节点的链表。

这后面还有别的变量,这里我也不太熟悉,不做介绍了。

fread函数简单介绍

fread的源码在glibc源码根目录的.\libio\iofread.c位置。

源码:

_IO_size_t
_IO_fread (void *buf, _IO_size_t size, _IO_size_t count, _IO_FILE *fp)
{
  _IO_size_t bytes_requested = size * count;
  _IO_size_t bytes_read;
  CHECK_FILE (fp, 0);
  if (bytes_requested == 0)
    return 0;
  _IO_acquire_lock (fp);
  bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
  _IO_release_lock (fp);
  return bytes_requested == bytes_read ? count : bytes_read / size;
}

fread的参数中,buf指需要读取到的地方,size是变量大小,count是数量,fp_IO_FILE结构的地址。

bytes_requested是读取的总字节数,为0时返回0,CHECK_FILE是一个宏,源码如下:

#ifdef IO_DEBUG
# define CHECK_FILE(FILE, RET) \
	if ((FILE) == NULL) { MAYBE_SET_EINVAL; return RET; } \
	else { COERCE_FILE(FILE); \
	       if (((FILE)->_IO_file_flags & _IO_MAGIC_MASK) != _IO_MAGIC) \
	  { MAYBE_SET_EINVAL; return RET; }}
#else
# define CHECK_FILE(FILE, RET) COERCE_FILE (FILE)
#endif

大致是检查fp是不是空,_IO_file_flags是不是被修改过。

然后最关键的是_IO_sgetn函数,这个函数是fread中真正起作用的函数。

_IO_sgetn实际上是在glibc源码根目录的.\libio\fileops.c位置

源码:(这里我就直接把我的理解以注释的形式写进源码中了

_IO_size_t
_IO_file_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
{
  _IO_size_t want, have;
  _IO_ssize_t count;
  char *s = data;

  want = n;

  if (fp->_IO_buf_base == NULL)			//buf_base有没有为空
    {
      /* Maybe we already have a push back pointer.  */
      if (fp->_IO_save_base != NULL)
	{
	  free (fp->_IO_save_base);
	  fp->_flags &= ~_IO_IN_BACKUP;
	}
      _IO_doallocbuf (fp);				//_IO_save_base为空时分配内存文件流的缓冲区
    }

  while (want > 0)						//需要读取的字符数量不为0才真正去读取数据
    {
      have = fp->_IO_read_end - fp->_IO_read_ptr;	//read部分含有多少空间可读
      if (want <= have)					//want小于可读空间大小时直接读取
	{
	  memcpy (s, fp->_IO_read_ptr, want);
	  fp->_IO_read_ptr += want;
	  want = 0;
	}
      else
	{
	  if (have > 0)						//read区还有地方读的时候先读取了这部分
	    {
#ifdef _LIBC
	      s = __mempcpy (s, fp->_IO_read_ptr, have);
#else
	      memcpy (s, fp->_IO_read_ptr, have);
	      s += have;
#endif
	      want -= have;
	      fp->_IO_read_ptr += have;
	    }

	  /* Check for backup and repeat */
	  if (_IO_in_backup (fp))
	    {
	      _IO_switch_to_main_get_area (fp);
	      continue;
	    }

	  /* If we now want less than a buffer, underflow and repeat
	     the copy.  Otherwise, _IO_SYSREAD directly to
	     the user buffer. */
	  if (fp->_IO_buf_base
	      && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
	    {
	      if (__underflow (fp) == EOF)								//再分配得到更多的数据到缓冲区
		break;

	      continue;
	    }

	  /* These must be set before the sysread as we might longjmp out
	     waiting for input. */
	  _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
	  _IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

	  /* Try to maintain alignment: read a whole number of blocks.  */
	  count = want;
	  if (fp->_IO_buf_base)
	    {
	      _IO_size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
	      if (block_size >= 128)
		count -= want % block_size;
	    }

	  count = _IO_SYSREAD (fp, s, count);					//再使用READ系统调用读取数据
	  if (count <= 0)
	    {
	      if (count == 0)
		fp->_flags |= _IO_EOF_SEEN;
	      else
		fp->_flags |= _IO_ERR_SEEN;

	      break;
	    }

	  s += count;
	  want -= count;
	  if (fp->_offset != _IO_pos_BAD)
	    _IO_pos_adjust (fp->_offset, count);
	}
    }

  return n - want;
}