4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
44 static inline int cifs_convert_flags(unsigned int flags)
46 if ((flags & O_ACCMODE) == O_RDONLY)
48 else if ((flags & O_ACCMODE) == O_WRONLY)
50 else if ((flags & O_ACCMODE) == O_RDWR) {
51 /* GENERIC_ALL is too much permission to request
52 can cause unnecessary access denied on create */
53 /* return GENERIC_ALL; */
54 return (GENERIC_READ | GENERIC_WRITE);
57 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
58 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
64 fmode_t posix_flags = 0;
66 if ((flags & O_ACCMODE) == O_RDONLY)
67 posix_flags = FMODE_READ;
68 else if ((flags & O_ACCMODE) == O_WRONLY)
69 posix_flags = FMODE_WRITE;
70 else if ((flags & O_ACCMODE) == O_RDWR) {
71 /* GENERIC_ALL is too much permission to request
72 can cause unnecessary access denied on create */
73 /* return GENERIC_ALL; */
74 posix_flags = FMODE_READ | FMODE_WRITE;
76 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
77 reopening a file. They had their effect on the original open */
79 posix_flags |= (fmode_t)O_APPEND;
81 posix_flags |= (fmode_t)O_DSYNC;
83 posix_flags |= (fmode_t)__O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= (fmode_t)O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= (fmode_t)O_NOFOLLOW;
89 posix_flags |= (fmode_t)O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 /* all arguments to this function must be checked for validity in caller */
110 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
111 struct cifsInodeInfo *pCifsInode, __u32 oplock,
115 write_lock(&GlobalSMBSeslock);
117 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
118 if (pCifsInode == NULL) {
119 write_unlock(&GlobalSMBSeslock);
123 if (pCifsInode->clientCanCacheRead) {
124 /* we have the inode open somewhere else
125 no need to discard cache data */
126 goto psx_client_can_cache;
129 /* BB FIXME need to fix this check to move it earlier into posix_open
130 BB fIX following section BB FIXME */
132 /* if not oplocked, invalidate inode pages if mtime or file
134 /* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
135 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
136 (file->f_path.dentry->d_inode->i_size ==
137 (loff_t)le64_to_cpu(buf->EndOfFile))) {
138 cFYI(1, "inode unchanged on server");
140 if (file->f_path.dentry->d_inode->i_mapping) {
141 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
143 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
145 cFYI(1, "invalidating remote inode since open detected it "
147 invalidate_remote_inode(file->f_path.dentry->d_inode);
150 psx_client_can_cache:
151 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
152 pCifsInode->clientCanCacheAll = true;
153 pCifsInode->clientCanCacheRead = true;
154 cFYI(1, "Exclusive Oplock granted on inode %p",
155 file->f_path.dentry->d_inode);
156 } else if ((oplock & 0xF) == OPLOCK_READ)
157 pCifsInode->clientCanCacheRead = true;
159 /* will have to change the unlock if we reenable the
160 filemap_fdatawrite (which does not seem necessary */
161 write_unlock(&GlobalSMBSeslock);
165 /* all arguments to this function must be checked for validity in caller */
166 static inline int cifs_open_inode_helper(struct inode *inode,
167 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
168 char *full_path, int xid)
170 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
171 struct timespec temp;
174 if (pCifsInode->clientCanCacheRead) {
175 /* we have the inode open somewhere else
176 no need to discard cache data */
177 goto client_can_cache;
180 /* BB need same check in cifs_create too? */
181 /* if not oplocked, invalidate inode pages if mtime or file
183 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
184 if (timespec_equal(&inode->i_mtime, &temp) &&
186 (loff_t)le64_to_cpu(buf->EndOfFile))) {
187 cFYI(1, "inode unchanged on server");
189 if (inode->i_mapping) {
190 /* BB no need to lock inode until after invalidate
191 since namei code should already have it locked? */
192 rc = filemap_write_and_wait(inode->i_mapping);
194 pCifsInode->write_behind_rc = rc;
196 cFYI(1, "invalidating remote inode since open detected it "
198 invalidate_remote_inode(inode);
203 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
206 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
209 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
210 pCifsInode->clientCanCacheAll = true;
211 pCifsInode->clientCanCacheRead = true;
212 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
213 } else if ((*oplock & 0xF) == OPLOCK_READ)
214 pCifsInode->clientCanCacheRead = true;
219 int cifs_open(struct inode *inode, struct file *file)
224 struct cifs_sb_info *cifs_sb;
225 struct cifsTconInfo *tcon;
226 struct cifsFileInfo *pCifsFile = NULL;
227 struct cifsInodeInfo *pCifsInode;
228 char *full_path = NULL;
232 FILE_ALL_INFO *buf = NULL;
236 cifs_sb = CIFS_SB(inode->i_sb);
237 tcon = cifs_sb->tcon;
239 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
241 full_path = build_path_from_dentry(file->f_path.dentry);
242 if (full_path == NULL) {
248 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
249 inode, file->f_flags, full_path);
256 if (!tcon->broken_posix_open && tcon->unix_ext &&
257 (tcon->ses->capabilities & CAP_UNIX) &&
258 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
259 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
260 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
261 oflags |= SMB_O_CREAT;
262 /* can not refresh inode info since size could be stale */
263 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
264 cifs_sb->mnt_file_mode /* ignored */,
265 oflags, &oplock, &netfid, xid);
267 cFYI(1, "posix open succeeded");
268 /* no need for special case handling of setting mode
269 on read only files needed here */
271 rc = cifs_posix_open_inode_helper(inode, file,
272 pCifsInode, oplock, netfid);
274 CIFSSMBClose(xid, tcon, netfid);
278 pCifsFile = cifs_new_fileinfo(inode, netfid, file,
281 if (pCifsFile == NULL) {
282 CIFSSMBClose(xid, tcon, netfid);
286 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
287 if (tcon->ses->serverNOS)
288 cERROR(1, "server %s of type %s returned"
289 " unexpected error on SMB posix open"
290 ", disabling posix open support."
291 " Check if server update available.",
292 tcon->ses->serverName,
293 tcon->ses->serverNOS);
294 tcon->broken_posix_open = true;
295 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
296 (rc != -EOPNOTSUPP)) /* path not found or net err */
298 /* else fallthrough to retry open the old way on network i/o
302 desiredAccess = cifs_convert_flags(file->f_flags);
304 /*********************************************************************
305 * open flag mapping table:
307 * POSIX Flag CIFS Disposition
308 * ---------- ----------------
309 * O_CREAT FILE_OPEN_IF
310 * O_CREAT | O_EXCL FILE_CREATE
311 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
312 * O_TRUNC FILE_OVERWRITE
313 * none of the above FILE_OPEN
315 * Note that there is not a direct match between disposition
316 * FILE_SUPERSEDE (ie create whether or not file exists although
317 * O_CREAT | O_TRUNC is similar but truncates the existing
318 * file rather than creating a new file as FILE_SUPERSEDE does
319 * (which uses the attributes / metadata passed in on open call)
321 *? O_SYNC is a reasonable match to CIFS writethrough flag
322 *? and the read write flags match reasonably. O_LARGEFILE
323 *? is irrelevant because largefile support is always used
324 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
325 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
326 *********************************************************************/
328 disposition = cifs_get_disposition(file->f_flags);
330 /* BB pass O_SYNC flag through on file attributes .. BB */
332 /* Also refresh inode by passing in file_info buf returned by SMBOpen
333 and calling get_inode_info with returned buf (at least helps
334 non-Unix server case) */
336 /* BB we can not do this if this is the second open of a file
337 and the first handle has writebehind data, we might be
338 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
339 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
345 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
346 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
347 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
348 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
349 & CIFS_MOUNT_MAP_SPECIAL_CHR);
351 rc = -EIO; /* no NT SMB support fall into legacy open below */
354 /* Old server, try legacy style OpenX */
355 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
356 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
357 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
358 & CIFS_MOUNT_MAP_SPECIAL_CHR);
361 cFYI(1, "cifs_open returned 0x%x", rc);
365 rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
369 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
371 if (pCifsFile == NULL) {
376 if (oplock & CIFS_CREATE_ACTION) {
377 /* time to set mode which we can not set earlier due to
378 problems creating new read-only files */
379 if (tcon->unix_ext) {
380 struct cifs_unix_set_info_args args = {
381 .mode = inode->i_mode,
384 .ctime = NO_CHANGE_64,
385 .atime = NO_CHANGE_64,
386 .mtime = NO_CHANGE_64,
389 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
391 cifs_sb->mnt_cifs_flags &
392 CIFS_MOUNT_MAP_SPECIAL_CHR);
403 /* Try to reacquire byte range locks that were released when session */
404 /* to server was lost */
405 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
409 /* BB list all locks open on this file and relock */
414 static int cifs_reopen_file(struct file *file, bool can_flush)
419 struct cifs_sb_info *cifs_sb;
420 struct cifsTconInfo *tcon;
421 struct cifsFileInfo *pCifsFile;
422 struct cifsInodeInfo *pCifsInode;
424 char *full_path = NULL;
426 int disposition = FILE_OPEN;
429 if (file->private_data)
430 pCifsFile = file->private_data;
435 mutex_lock(&pCifsFile->fh_mutex);
436 if (!pCifsFile->invalidHandle) {
437 mutex_unlock(&pCifsFile->fh_mutex);
443 if (file->f_path.dentry == NULL) {
444 cERROR(1, "no valid name if dentry freed");
447 goto reopen_error_exit;
450 inode = file->f_path.dentry->d_inode;
452 cERROR(1, "inode not valid");
455 goto reopen_error_exit;
458 cifs_sb = CIFS_SB(inode->i_sb);
459 tcon = cifs_sb->tcon;
461 /* can not grab rename sem here because various ops, including
462 those that already have the rename sem can end up causing writepage
463 to get called and if the server was down that means we end up here,
464 and we can never tell if the caller already has the rename_sem */
465 full_path = build_path_from_dentry(file->f_path.dentry);
466 if (full_path == NULL) {
469 mutex_unlock(&pCifsFile->fh_mutex);
474 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
475 inode, file->f_flags, full_path);
482 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
483 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
484 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
485 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
486 /* can not refresh inode info since size could be stale */
487 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
488 cifs_sb->mnt_file_mode /* ignored */,
489 oflags, &oplock, &netfid, xid);
491 cFYI(1, "posix reopen succeeded");
494 /* fallthrough to retry open the old way on errors, especially
495 in the reconnect path it is important to retry hard */
498 desiredAccess = cifs_convert_flags(file->f_flags);
500 /* Can not refresh inode by passing in file_info buf to be returned
501 by SMBOpen and then calling get_inode_info with returned buf
502 since file might have write behind data that needs to be flushed
503 and server version of file size can be stale. If we knew for sure
504 that inode was not dirty locally we could do this */
506 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
507 CREATE_NOT_DIR, &netfid, &oplock, NULL,
508 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
509 CIFS_MOUNT_MAP_SPECIAL_CHR);
511 mutex_unlock(&pCifsFile->fh_mutex);
512 cFYI(1, "cifs_open returned 0x%x", rc);
513 cFYI(1, "oplock: %d", oplock);
516 pCifsFile->netfid = netfid;
517 pCifsFile->invalidHandle = false;
518 mutex_unlock(&pCifsFile->fh_mutex);
519 pCifsInode = CIFS_I(inode);
522 rc = filemap_write_and_wait(inode->i_mapping);
524 CIFS_I(inode)->write_behind_rc = rc;
525 /* temporarily disable caching while we
526 go to server to get inode info */
527 pCifsInode->clientCanCacheAll = false;
528 pCifsInode->clientCanCacheRead = false;
530 rc = cifs_get_inode_info_unix(&inode,
531 full_path, inode->i_sb, xid);
533 rc = cifs_get_inode_info(&inode,
534 full_path, NULL, inode->i_sb,
536 } /* else we are writing out data to server already
537 and could deadlock if we tried to flush data, and
538 since we do not know if we have data that would
539 invalidate the current end of file on the server
540 we can not go to the server to get the new inod
542 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
543 pCifsInode->clientCanCacheAll = true;
544 pCifsInode->clientCanCacheRead = true;
545 cFYI(1, "Exclusive Oplock granted on inode %p",
546 file->f_path.dentry->d_inode);
547 } else if ((oplock & 0xF) == OPLOCK_READ) {
548 pCifsInode->clientCanCacheRead = true;
549 pCifsInode->clientCanCacheAll = false;
551 pCifsInode->clientCanCacheRead = false;
552 pCifsInode->clientCanCacheAll = false;
554 cifs_relock_file(pCifsFile);
562 int cifs_close(struct inode *inode, struct file *file)
566 struct cifs_sb_info *cifs_sb;
567 struct cifsTconInfo *pTcon;
568 struct cifsFileInfo *pSMBFile = file->private_data;
572 cifs_sb = CIFS_SB(inode->i_sb);
573 pTcon = cifs_sb->tcon;
575 struct cifsLockInfo *li, *tmp;
576 write_lock(&GlobalSMBSeslock);
577 pSMBFile->closePend = true;
579 /* no sense reconnecting to close a file that is
581 if (!pTcon->need_reconnect) {
582 write_unlock(&GlobalSMBSeslock);
584 while ((atomic_read(&pSMBFile->count) != 1)
585 && (timeout <= 2048)) {
586 /* Give write a better chance to get to
587 server ahead of the close. We do not
588 want to add a wait_q here as it would
589 increase the memory utilization as
590 the struct would be in each open file,
591 but this should give enough time to
593 cFYI(DBG2, "close delay, write pending");
597 if (!pTcon->need_reconnect &&
598 !pSMBFile->invalidHandle)
599 rc = CIFSSMBClose(xid, pTcon,
602 write_unlock(&GlobalSMBSeslock);
604 write_unlock(&GlobalSMBSeslock);
606 /* Delete any outstanding lock records.
607 We'll lose them when the file is closed anyway. */
608 mutex_lock(&pSMBFile->lock_mutex);
609 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
610 list_del(&li->llist);
613 mutex_unlock(&pSMBFile->lock_mutex);
615 write_lock(&GlobalSMBSeslock);
616 list_del(&pSMBFile->flist);
617 list_del(&pSMBFile->tlist);
618 write_unlock(&GlobalSMBSeslock);
619 cifsFileInfo_put(file->private_data);
620 file->private_data = NULL;
624 read_lock(&GlobalSMBSeslock);
625 if (list_empty(&(CIFS_I(inode)->openFileList))) {
626 cFYI(1, "closing last open instance for inode %p", inode);
627 /* if the file is not open we do not know if we can cache info
628 on this inode, much less write behind and read ahead */
629 CIFS_I(inode)->clientCanCacheRead = false;
630 CIFS_I(inode)->clientCanCacheAll = false;
632 read_unlock(&GlobalSMBSeslock);
633 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
634 rc = CIFS_I(inode)->write_behind_rc;
639 int cifs_closedir(struct inode *inode, struct file *file)
643 struct cifsFileInfo *pCFileStruct = file->private_data;
646 cFYI(1, "Closedir inode = 0x%p", inode);
651 struct cifsTconInfo *pTcon;
652 struct cifs_sb_info *cifs_sb =
653 CIFS_SB(file->f_path.dentry->d_sb);
655 pTcon = cifs_sb->tcon;
657 cFYI(1, "Freeing private data in close dir");
658 write_lock(&GlobalSMBSeslock);
659 if (!pCFileStruct->srch_inf.endOfSearch &&
660 !pCFileStruct->invalidHandle) {
661 pCFileStruct->invalidHandle = true;
662 write_unlock(&GlobalSMBSeslock);
663 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
664 cFYI(1, "Closing uncompleted readdir with rc %d",
666 /* not much we can do if it fails anyway, ignore rc */
669 write_unlock(&GlobalSMBSeslock);
670 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
672 cFYI(1, "closedir free smb buf in srch struct");
673 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
674 if (pCFileStruct->srch_inf.smallBuf)
675 cifs_small_buf_release(ptmp);
677 cifs_buf_release(ptmp);
679 kfree(file->private_data);
680 file->private_data = NULL;
682 /* BB can we lock the filestruct while this is going on? */
687 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
688 __u64 offset, __u8 lockType)
690 struct cifsLockInfo *li =
691 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
697 mutex_lock(&fid->lock_mutex);
698 list_add(&li->llist, &fid->llist);
699 mutex_unlock(&fid->lock_mutex);
703 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
709 bool wait_flag = false;
710 struct cifs_sb_info *cifs_sb;
711 struct cifsTconInfo *tcon;
713 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
714 bool posix_locking = 0;
716 length = 1 + pfLock->fl_end - pfLock->fl_start;
720 cFYI(1, "Lock parm: 0x%x flockflags: "
721 "0x%x flocktype: 0x%x start: %lld end: %lld",
722 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
725 if (pfLock->fl_flags & FL_POSIX)
727 if (pfLock->fl_flags & FL_FLOCK)
729 if (pfLock->fl_flags & FL_SLEEP) {
730 cFYI(1, "Blocking lock");
733 if (pfLock->fl_flags & FL_ACCESS)
734 cFYI(1, "Process suspended by mandatory locking - "
735 "not implemented yet");
736 if (pfLock->fl_flags & FL_LEASE)
737 cFYI(1, "Lease on file - not implemented yet");
738 if (pfLock->fl_flags &
739 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
740 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
742 if (pfLock->fl_type == F_WRLCK) {
745 } else if (pfLock->fl_type == F_UNLCK) {
748 /* Check if unlock includes more than
750 } else if (pfLock->fl_type == F_RDLCK) {
752 lockType |= LOCKING_ANDX_SHARED_LOCK;
754 } else if (pfLock->fl_type == F_EXLCK) {
757 } else if (pfLock->fl_type == F_SHLCK) {
759 lockType |= LOCKING_ANDX_SHARED_LOCK;
762 cFYI(1, "Unknown type of lock");
764 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
765 tcon = cifs_sb->tcon;
767 if (file->private_data == NULL) {
772 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
774 if ((tcon->ses->capabilities & CAP_UNIX) &&
775 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
776 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
778 /* BB add code here to normalize offset and length to
779 account for negative length which we can not accept over the
784 if (lockType & LOCKING_ANDX_SHARED_LOCK)
785 posix_lock_type = CIFS_RDLCK;
787 posix_lock_type = CIFS_WRLCK;
788 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
790 posix_lock_type, wait_flag);
795 /* BB we could chain these into one lock request BB */
796 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
797 0, 1, lockType, 0 /* wait flag */ );
799 rc = CIFSSMBLock(xid, tcon, netfid, length,
800 pfLock->fl_start, 1 /* numUnlock */ ,
801 0 /* numLock */ , lockType,
803 pfLock->fl_type = F_UNLCK;
805 cERROR(1, "Error unlocking previously locked "
806 "range %d during test of lock", rc);
810 /* if rc == ERR_SHARING_VIOLATION ? */
813 if (lockType & LOCKING_ANDX_SHARED_LOCK) {
814 pfLock->fl_type = F_WRLCK;
816 rc = CIFSSMBLock(xid, tcon, netfid, length,
817 pfLock->fl_start, 0, 1,
818 lockType | LOCKING_ANDX_SHARED_LOCK,
821 rc = CIFSSMBLock(xid, tcon, netfid,
822 length, pfLock->fl_start, 1, 0,
824 LOCKING_ANDX_SHARED_LOCK,
826 pfLock->fl_type = F_RDLCK;
828 cERROR(1, "Error unlocking "
829 "previously locked range %d "
830 "during test of lock", rc);
833 pfLock->fl_type = F_WRLCK;
843 if (!numLock && !numUnlock) {
844 /* if no lock or unlock then nothing
845 to do since we do not know what it is */
852 if (lockType & LOCKING_ANDX_SHARED_LOCK)
853 posix_lock_type = CIFS_RDLCK;
855 posix_lock_type = CIFS_WRLCK;
858 posix_lock_type = CIFS_UNLCK;
860 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
862 posix_lock_type, wait_flag);
864 struct cifsFileInfo *fid = file->private_data;
867 rc = CIFSSMBLock(xid, tcon, netfid, length,
869 0, numLock, lockType, wait_flag);
872 /* For Windows locks we must store them. */
873 rc = store_file_lock(fid, length,
874 pfLock->fl_start, lockType);
876 } else if (numUnlock) {
877 /* For each stored lock that this unlock overlaps
878 completely, unlock it. */
880 struct cifsLockInfo *li, *tmp;
883 mutex_lock(&fid->lock_mutex);
884 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
885 if (pfLock->fl_start <= li->offset &&
886 (pfLock->fl_start + length) >=
887 (li->offset + li->length)) {
888 stored_rc = CIFSSMBLock(xid, tcon,
890 li->length, li->offset,
891 1, 0, li->type, false);
895 list_del(&li->llist);
900 mutex_unlock(&fid->lock_mutex);
904 if (pfLock->fl_flags & FL_POSIX)
905 posix_lock_file_wait(file, pfLock);
911 * Set the timeout on write requests past EOF. For some servers (Windows)
912 * these calls can be very long.
914 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
915 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
916 * The 10M cutoff is totally arbitrary. A better scheme for this would be
917 * welcome if someone wants to suggest one.
919 * We may be able to do a better job with this if there were some way to
920 * declare that a file should be sparse.
923 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
925 if (offset <= cifsi->server_eof)
927 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
928 return CIFS_VLONG_OP;
933 /* update the file size (if needed) after a write */
935 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
936 unsigned int bytes_written)
938 loff_t end_of_write = offset + bytes_written;
940 if (end_of_write > cifsi->server_eof)
941 cifsi->server_eof = end_of_write;
944 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
945 size_t write_size, loff_t *poffset)
948 unsigned int bytes_written = 0;
949 unsigned int total_written;
950 struct cifs_sb_info *cifs_sb;
951 struct cifsTconInfo *pTcon;
953 struct cifsFileInfo *open_file;
954 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
956 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
958 pTcon = cifs_sb->tcon;
960 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
961 *poffset, file->f_path.dentry->d_name.name); */
963 if (file->private_data == NULL)
965 open_file = file->private_data;
967 rc = generic_write_checks(file, poffset, &write_size, 0);
973 long_op = cifs_write_timeout(cifsi, *poffset);
974 for (total_written = 0; write_size > total_written;
975 total_written += bytes_written) {
977 while (rc == -EAGAIN) {
978 if (file->private_data == NULL) {
979 /* file has been closed on us */
981 /* if we have gotten here we have written some data
982 and blocked, and the file has been freed on us while
983 we blocked so return what we managed to write */
984 return total_written;
986 if (open_file->closePend) {
989 return total_written;
993 if (open_file->invalidHandle) {
994 /* we could deadlock if we called
995 filemap_fdatawait from here so tell
996 reopen_file not to flush data to server
998 rc = cifs_reopen_file(file, false);
1003 rc = CIFSSMBWrite(xid, pTcon,
1005 min_t(const int, cifs_sb->wsize,
1006 write_size - total_written),
1007 *poffset, &bytes_written,
1008 NULL, write_data + total_written, long_op);
1010 if (rc || (bytes_written == 0)) {
1018 cifs_update_eof(cifsi, *poffset, bytes_written);
1019 *poffset += bytes_written;
1021 long_op = CIFS_STD_OP; /* subsequent writes fast -
1022 15 seconds is plenty */
1025 cifs_stats_bytes_written(pTcon, total_written);
1027 /* since the write may have blocked check these pointers again */
1028 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1029 struct inode *inode = file->f_path.dentry->d_inode;
1030 /* Do not update local mtime - server will set its actual value on write
1031 * inode->i_ctime = inode->i_mtime =
1032 * current_fs_time(inode->i_sb);*/
1033 if (total_written > 0) {
1034 spin_lock(&inode->i_lock);
1035 if (*poffset > file->f_path.dentry->d_inode->i_size)
1036 i_size_write(file->f_path.dentry->d_inode,
1038 spin_unlock(&inode->i_lock);
1040 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1043 return total_written;
1046 static ssize_t cifs_write(struct file *file, const char *write_data,
1047 size_t write_size, loff_t *poffset)
1050 unsigned int bytes_written = 0;
1051 unsigned int total_written;
1052 struct cifs_sb_info *cifs_sb;
1053 struct cifsTconInfo *pTcon;
1055 struct cifsFileInfo *open_file;
1056 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1058 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1060 pTcon = cifs_sb->tcon;
1062 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1063 *poffset, file->f_path.dentry->d_name.name);
1065 if (file->private_data == NULL)
1067 open_file = file->private_data;
1071 long_op = cifs_write_timeout(cifsi, *poffset);
1072 for (total_written = 0; write_size > total_written;
1073 total_written += bytes_written) {
1075 while (rc == -EAGAIN) {
1076 if (file->private_data == NULL) {
1077 /* file has been closed on us */
1079 /* if we have gotten here we have written some data
1080 and blocked, and the file has been freed on us
1081 while we blocked so return what we managed to
1083 return total_written;
1085 if (open_file->closePend) {
1088 return total_written;
1092 if (open_file->invalidHandle) {
1093 /* we could deadlock if we called
1094 filemap_fdatawait from here so tell
1095 reopen_file not to flush data to
1097 rc = cifs_reopen_file(file, false);
1101 if (experimEnabled || (pTcon->ses->server &&
1102 ((pTcon->ses->server->secMode &
1103 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1108 len = min((size_t)cifs_sb->wsize,
1109 write_size - total_written);
1110 /* iov[0] is reserved for smb header */
1111 iov[1].iov_base = (char *)write_data +
1113 iov[1].iov_len = len;
1114 rc = CIFSSMBWrite2(xid, pTcon,
1115 open_file->netfid, len,
1116 *poffset, &bytes_written,
1119 rc = CIFSSMBWrite(xid, pTcon,
1121 min_t(const int, cifs_sb->wsize,
1122 write_size - total_written),
1123 *poffset, &bytes_written,
1124 write_data + total_written,
1127 if (rc || (bytes_written == 0)) {
1135 cifs_update_eof(cifsi, *poffset, bytes_written);
1136 *poffset += bytes_written;
1138 long_op = CIFS_STD_OP; /* subsequent writes fast -
1139 15 seconds is plenty */
1142 cifs_stats_bytes_written(pTcon, total_written);
1144 /* since the write may have blocked check these pointers again */
1145 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1146 /*BB We could make this contingent on superblock ATIME flag too */
1147 /* file->f_path.dentry->d_inode->i_ctime =
1148 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1149 if (total_written > 0) {
1150 spin_lock(&file->f_path.dentry->d_inode->i_lock);
1151 if (*poffset > file->f_path.dentry->d_inode->i_size)
1152 i_size_write(file->f_path.dentry->d_inode,
1154 spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1156 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1159 return total_written;
1162 #ifdef CONFIG_CIFS_EXPERIMENTAL
1163 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1165 struct cifsFileInfo *open_file = NULL;
1167 read_lock(&GlobalSMBSeslock);
1168 /* we could simply get the first_list_entry since write-only entries
1169 are always at the end of the list but since the first entry might
1170 have a close pending, we go through the whole list */
1171 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1172 if (open_file->closePend)
1174 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1175 (open_file->pfile->f_flags & O_RDONLY))) {
1176 if (!open_file->invalidHandle) {
1177 /* found a good file */
1178 /* lock it so it will not be closed on us */
1179 cifsFileInfo_get(open_file);
1180 read_unlock(&GlobalSMBSeslock);
1182 } /* else might as well continue, and look for
1183 another, or simply have the caller reopen it
1184 again rather than trying to fix this handle */
1185 } else /* write only file */
1186 break; /* write only files are last so must be done */
1188 read_unlock(&GlobalSMBSeslock);
1193 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1195 struct cifsFileInfo *open_file;
1196 bool any_available = false;
1199 /* Having a null inode here (because mapping->host was set to zero by
1200 the VFS or MM) should not happen but we had reports of on oops (due to
1201 it being zero) during stress testcases so we need to check for it */
1203 if (cifs_inode == NULL) {
1204 cERROR(1, "Null inode passed to cifs_writeable_file");
1209 read_lock(&GlobalSMBSeslock);
1211 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1212 if (open_file->closePend ||
1213 (!any_available && open_file->pid != current->tgid))
1216 if (open_file->pfile &&
1217 ((open_file->pfile->f_flags & O_RDWR) ||
1218 (open_file->pfile->f_flags & O_WRONLY))) {
1219 cifsFileInfo_get(open_file);
1221 if (!open_file->invalidHandle) {
1222 /* found a good writable file */
1223 read_unlock(&GlobalSMBSeslock);
1227 read_unlock(&GlobalSMBSeslock);
1228 /* Had to unlock since following call can block */
1229 rc = cifs_reopen_file(open_file->pfile, false);
1231 if (!open_file->closePend)
1233 else { /* start over in case this was deleted */
1234 /* since the list could be modified */
1235 read_lock(&GlobalSMBSeslock);
1236 cifsFileInfo_put(open_file);
1237 goto refind_writable;
1241 /* if it fails, try another handle if possible -
1242 (we can not do this if closePending since
1243 loop could be modified - in which case we
1244 have to start at the beginning of the list
1245 again. Note that it would be bad
1246 to hold up writepages here (rather than
1247 in caller) with continuous retries */
1248 cFYI(1, "wp failed on reopen file");
1249 read_lock(&GlobalSMBSeslock);
1250 /* can not use this handle, no write
1251 pending on this one after all */
1252 cifsFileInfo_put(open_file);
1254 if (open_file->closePend) /* list could have changed */
1255 goto refind_writable;
1256 /* else we simply continue to the next entry. Thus
1257 we do not loop on reopen errors. If we
1258 can not reopen the file, for example if we
1259 reconnected to a server with another client
1260 racing to delete or lock the file we would not
1261 make progress if we restarted before the beginning
1262 of the loop here. */
1265 /* couldn't find useable FH with same pid, try any available */
1266 if (!any_available) {
1267 any_available = true;
1268 goto refind_writable;
1270 read_unlock(&GlobalSMBSeslock);
1274 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1276 struct address_space *mapping = page->mapping;
1277 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1280 int bytes_written = 0;
1281 struct cifs_sb_info *cifs_sb;
1282 struct cifsTconInfo *pTcon;
1283 struct inode *inode;
1284 struct cifsFileInfo *open_file;
1286 if (!mapping || !mapping->host)
1289 inode = page->mapping->host;
1290 cifs_sb = CIFS_SB(inode->i_sb);
1291 pTcon = cifs_sb->tcon;
1293 offset += (loff_t)from;
1294 write_data = kmap(page);
1297 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1302 /* racing with truncate? */
1303 if (offset > mapping->host->i_size) {
1305 return 0; /* don't care */
1308 /* check to make sure that we are not extending the file */
1309 if (mapping->host->i_size - offset < (loff_t)to)
1310 to = (unsigned)(mapping->host->i_size - offset);
1312 open_file = find_writable_file(CIFS_I(mapping->host));
1314 bytes_written = cifs_write(open_file->pfile, write_data,
1316 cifsFileInfo_put(open_file);
1317 /* Does mm or vfs already set times? */
1318 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1319 if ((bytes_written > 0) && (offset))
1321 else if (bytes_written < 0)
1324 cFYI(1, "No writeable filehandles for inode");
1332 static int cifs_writepages(struct address_space *mapping,
1333 struct writeback_control *wbc)
1335 struct backing_dev_info *bdi = mapping->backing_dev_info;
1336 unsigned int bytes_to_write;
1337 unsigned int bytes_written;
1338 struct cifs_sb_info *cifs_sb;
1342 int range_whole = 0;
1349 struct cifsFileInfo *open_file;
1350 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1352 struct pagevec pvec;
1357 cifs_sb = CIFS_SB(mapping->host->i_sb);
1360 * If wsize is smaller that the page cache size, default to writing
1361 * one page at a time via cifs_writepage
1363 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1364 return generic_writepages(mapping, wbc);
1366 if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1367 if (cifs_sb->tcon->ses->server->secMode &
1368 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1369 if (!experimEnabled)
1370 return generic_writepages(mapping, wbc);
1372 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1374 return generic_writepages(mapping, wbc);
1378 * BB: Is this meaningful for a non-block-device file system?
1379 * If it is, we should test it again after we do I/O
1381 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1382 wbc->encountered_congestion = 1;
1389 pagevec_init(&pvec, 0);
1390 if (wbc->range_cyclic) {
1391 index = mapping->writeback_index; /* Start from prev offset */
1394 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1395 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1396 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1401 while (!done && (index <= end) &&
1402 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1403 PAGECACHE_TAG_DIRTY,
1404 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1413 for (i = 0; i < nr_pages; i++) {
1414 page = pvec.pages[i];
1416 * At this point we hold neither mapping->tree_lock nor
1417 * lock on the page itself: the page may be truncated or
1418 * invalidated (changing page->mapping to NULL), or even
1419 * swizzled back from swapper_space to tmpfs file
1425 else if (!trylock_page(page))
1428 if (unlikely(page->mapping != mapping)) {
1433 if (!wbc->range_cyclic && page->index > end) {
1439 if (next && (page->index != next)) {
1440 /* Not next consecutive page */
1445 if (wbc->sync_mode != WB_SYNC_NONE)
1446 wait_on_page_writeback(page);
1448 if (PageWriteback(page) ||
1449 !clear_page_dirty_for_io(page)) {
1455 * This actually clears the dirty bit in the radix tree.
1456 * See cifs_writepage() for more commentary.
1458 set_page_writeback(page);
1460 if (page_offset(page) >= mapping->host->i_size) {
1463 end_page_writeback(page);
1468 * BB can we get rid of this? pages are held by pvec
1470 page_cache_get(page);
1472 len = min(mapping->host->i_size - page_offset(page),
1473 (loff_t)PAGE_CACHE_SIZE);
1475 /* reserve iov[0] for the smb header */
1477 iov[n_iov].iov_base = kmap(page);
1478 iov[n_iov].iov_len = len;
1479 bytes_to_write += len;
1483 offset = page_offset(page);
1485 next = page->index + 1;
1486 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1490 /* Search for a writable handle every time we call
1491 * CIFSSMBWrite2. We can't rely on the last handle
1492 * we used to still be valid
1494 open_file = find_writable_file(CIFS_I(mapping->host));
1496 cERROR(1, "No writable handles for inode");
1499 long_op = cifs_write_timeout(cifsi, offset);
1500 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1502 bytes_to_write, offset,
1503 &bytes_written, iov, n_iov,
1505 cifsFileInfo_put(open_file);
1506 cifs_update_eof(cifsi, offset, bytes_written);
1508 if (rc || bytes_written < bytes_to_write) {
1509 cERROR(1, "Write2 ret %d, wrote %d",
1511 /* BB what if continued retry is
1512 requested via mount flags? */
1514 set_bit(AS_ENOSPC, &mapping->flags);
1516 set_bit(AS_EIO, &mapping->flags);
1518 cifs_stats_bytes_written(cifs_sb->tcon,
1522 for (i = 0; i < n_iov; i++) {
1523 page = pvec.pages[first + i];
1524 /* Should we also set page error on
1525 success rc but too little data written? */
1526 /* BB investigate retry logic on temporary
1527 server crash cases and how recovery works
1528 when page marked as error */
1533 end_page_writeback(page);
1534 page_cache_release(page);
1536 if ((wbc->nr_to_write -= n_iov) <= 0)
1540 /* Need to re-find the pages we skipped */
1541 index = pvec.pages[0]->index + 1;
1543 pagevec_release(&pvec);
1545 if (!scanned && !done) {
1547 * We hit the last page and there is more work to be done: wrap
1548 * back to the start of the file
1554 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1555 mapping->writeback_index = index;
1562 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1568 /* BB add check for wbc flags */
1569 page_cache_get(page);
1570 if (!PageUptodate(page))
1571 cFYI(1, "ppw - page not up to date");
1574 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1576 * A writepage() implementation always needs to do either this,
1577 * or re-dirty the page with "redirty_page_for_writepage()" in
1578 * the case of a failure.
1580 * Just unlocking the page will cause the radix tree tag-bits
1581 * to fail to update with the state of the page correctly.
1583 set_page_writeback(page);
1584 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1585 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1587 end_page_writeback(page);
1588 page_cache_release(page);
1593 static int cifs_write_end(struct file *file, struct address_space *mapping,
1594 loff_t pos, unsigned len, unsigned copied,
1595 struct page *page, void *fsdata)
1598 struct inode *inode = mapping->host;
1600 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1603 if (PageChecked(page)) {
1605 SetPageUptodate(page);
1606 ClearPageChecked(page);
1607 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1608 SetPageUptodate(page);
1610 if (!PageUptodate(page)) {
1612 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1616 /* this is probably better than directly calling
1617 partialpage_write since in this function the file handle is
1618 known which we might as well leverage */
1619 /* BB check if anything else missing out of ppw
1620 such as updating last write time */
1621 page_data = kmap(page);
1622 rc = cifs_write(file, page_data + offset, copied, &pos);
1623 /* if (rc < 0) should we set writebehind rc? */
1630 set_page_dirty(page);
1634 spin_lock(&inode->i_lock);
1635 if (pos > inode->i_size)
1636 i_size_write(inode, pos);
1637 spin_unlock(&inode->i_lock);
1641 page_cache_release(page);
1646 int cifs_fsync(struct file *file, int datasync)
1650 struct cifsTconInfo *tcon;
1651 struct cifsFileInfo *smbfile = file->private_data;
1652 struct inode *inode = file->f_path.dentry->d_inode;
1656 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1657 file->f_path.dentry->d_name.name, datasync);
1659 rc = filemap_write_and_wait(inode->i_mapping);
1661 rc = CIFS_I(inode)->write_behind_rc;
1662 CIFS_I(inode)->write_behind_rc = 0;
1663 tcon = CIFS_SB(inode->i_sb)->tcon;
1664 if (!rc && tcon && smbfile &&
1665 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1666 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1673 /* static void cifs_sync_page(struct page *page)
1675 struct address_space *mapping;
1676 struct inode *inode;
1677 unsigned long index = page->index;
1678 unsigned int rpages = 0;
1681 cFYI(1, "sync page %p", page);
1682 mapping = page->mapping;
1685 inode = mapping->host;
1689 /* fill in rpages then
1690 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1692 /* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1702 * As file closes, flush all cached write data for this inode checking
1703 * for write behind errors.
1705 int cifs_flush(struct file *file, fl_owner_t id)
1707 struct inode *inode = file->f_path.dentry->d_inode;
1710 /* Rather than do the steps manually:
1711 lock the inode for writing
1712 loop through pages looking for write behind data (dirty pages)
1713 coalesce into contiguous 16K (or smaller) chunks to write to server
1714 send to server (prefer in parallel)
1715 deal with writebehind errors
1716 unlock inode for writing
1717 filemapfdatawrite appears easier for the time being */
1719 rc = filemap_fdatawrite(inode->i_mapping);
1720 /* reset wb rc if we were able to write out dirty pages */
1722 rc = CIFS_I(inode)->write_behind_rc;
1723 CIFS_I(inode)->write_behind_rc = 0;
1726 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1731 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1732 size_t read_size, loff_t *poffset)
1735 unsigned int bytes_read = 0;
1736 unsigned int total_read = 0;
1737 unsigned int current_read_size;
1738 struct cifs_sb_info *cifs_sb;
1739 struct cifsTconInfo *pTcon;
1741 struct cifsFileInfo *open_file;
1742 char *smb_read_data;
1743 char __user *current_offset;
1744 struct smb_com_read_rsp *pSMBr;
1747 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1748 pTcon = cifs_sb->tcon;
1750 if (file->private_data == NULL) {
1755 open_file = file->private_data;
1757 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1758 cFYI(1, "attempting read on write only file instance");
1760 for (total_read = 0, current_offset = read_data;
1761 read_size > total_read;
1762 total_read += bytes_read, current_offset += bytes_read) {
1763 current_read_size = min_t(const int, read_size - total_read,
1766 smb_read_data = NULL;
1767 while (rc == -EAGAIN) {
1768 int buf_type = CIFS_NO_BUFFER;
1769 if ((open_file->invalidHandle) &&
1770 (!open_file->closePend)) {
1771 rc = cifs_reopen_file(file, true);
1775 rc = CIFSSMBRead(xid, pTcon,
1777 current_read_size, *poffset,
1778 &bytes_read, &smb_read_data,
1780 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1781 if (smb_read_data) {
1782 if (copy_to_user(current_offset,
1784 4 /* RFC1001 length field */ +
1785 le16_to_cpu(pSMBr->DataOffset),
1789 if (buf_type == CIFS_SMALL_BUFFER)
1790 cifs_small_buf_release(smb_read_data);
1791 else if (buf_type == CIFS_LARGE_BUFFER)
1792 cifs_buf_release(smb_read_data);
1793 smb_read_data = NULL;
1796 if (rc || (bytes_read == 0)) {
1804 cifs_stats_bytes_read(pTcon, bytes_read);
1805 *poffset += bytes_read;
1813 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1817 unsigned int bytes_read = 0;
1818 unsigned int total_read;
1819 unsigned int current_read_size;
1820 struct cifs_sb_info *cifs_sb;
1821 struct cifsTconInfo *pTcon;
1823 char *current_offset;
1824 struct cifsFileInfo *open_file;
1825 int buf_type = CIFS_NO_BUFFER;
1828 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1829 pTcon = cifs_sb->tcon;
1831 if (file->private_data == NULL) {
1836 open_file = file->private_data;
1838 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1839 cFYI(1, "attempting read on write only file instance");
1841 for (total_read = 0, current_offset = read_data;
1842 read_size > total_read;
1843 total_read += bytes_read, current_offset += bytes_read) {
1844 current_read_size = min_t(const int, read_size - total_read,
1846 /* For windows me and 9x we do not want to request more
1847 than it negotiated since it will refuse the read then */
1849 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1850 current_read_size = min_t(const int, current_read_size,
1851 pTcon->ses->server->maxBuf - 128);
1854 while (rc == -EAGAIN) {
1855 if ((open_file->invalidHandle) &&
1856 (!open_file->closePend)) {
1857 rc = cifs_reopen_file(file, true);
1861 rc = CIFSSMBRead(xid, pTcon,
1863 current_read_size, *poffset,
1864 &bytes_read, ¤t_offset,
1867 if (rc || (bytes_read == 0)) {
1875 cifs_stats_bytes_read(pTcon, total_read);
1876 *poffset += bytes_read;
1883 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1888 rc = cifs_revalidate_file(file);
1890 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1894 rc = generic_file_mmap(file, vma);
1900 static void cifs_copy_cache_pages(struct address_space *mapping,
1901 struct list_head *pages, int bytes_read, char *data)
1906 while (bytes_read > 0) {
1907 if (list_empty(pages))
1910 page = list_entry(pages->prev, struct page, lru);
1911 list_del(&page->lru);
1913 if (add_to_page_cache_lru(page, mapping, page->index,
1915 page_cache_release(page);
1916 cFYI(1, "Add page cache failed");
1917 data += PAGE_CACHE_SIZE;
1918 bytes_read -= PAGE_CACHE_SIZE;
1921 page_cache_release(page);
1923 target = kmap_atomic(page, KM_USER0);
1925 if (PAGE_CACHE_SIZE > bytes_read) {
1926 memcpy(target, data, bytes_read);
1927 /* zero the tail end of this partial page */
1928 memset(target + bytes_read, 0,
1929 PAGE_CACHE_SIZE - bytes_read);
1932 memcpy(target, data, PAGE_CACHE_SIZE);
1933 bytes_read -= PAGE_CACHE_SIZE;
1935 kunmap_atomic(target, KM_USER0);
1937 flush_dcache_page(page);
1938 SetPageUptodate(page);
1940 data += PAGE_CACHE_SIZE;
1945 static int cifs_readpages(struct file *file, struct address_space *mapping,
1946 struct list_head *page_list, unsigned num_pages)
1952 struct cifs_sb_info *cifs_sb;
1953 struct cifsTconInfo *pTcon;
1954 unsigned int bytes_read = 0;
1955 unsigned int read_size, i;
1956 char *smb_read_data = NULL;
1957 struct smb_com_read_rsp *pSMBr;
1958 struct cifsFileInfo *open_file;
1959 int buf_type = CIFS_NO_BUFFER;
1962 if (file->private_data == NULL) {
1967 open_file = file->private_data;
1968 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1969 pTcon = cifs_sb->tcon;
1971 cFYI(DBG2, "rpages: num pages %d", num_pages);
1972 for (i = 0; i < num_pages; ) {
1973 unsigned contig_pages;
1974 struct page *tmp_page;
1975 unsigned long expected_index;
1977 if (list_empty(page_list))
1980 page = list_entry(page_list->prev, struct page, lru);
1981 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1983 /* count adjacent pages that we will read into */
1986 list_entry(page_list->prev, struct page, lru)->index;
1987 list_for_each_entry_reverse(tmp_page, page_list, lru) {
1988 if (tmp_page->index == expected_index) {
1994 if (contig_pages + i > num_pages)
1995 contig_pages = num_pages - i;
1997 /* for reads over a certain size could initiate async
2000 read_size = contig_pages * PAGE_CACHE_SIZE;
2001 /* Read size needs to be in multiples of one page */
2002 read_size = min_t(const unsigned int, read_size,
2003 cifs_sb->rsize & PAGE_CACHE_MASK);
2004 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2005 read_size, contig_pages);
2007 while (rc == -EAGAIN) {
2008 if ((open_file->invalidHandle) &&
2009 (!open_file->closePend)) {
2010 rc = cifs_reopen_file(file, true);
2015 rc = CIFSSMBRead(xid, pTcon,
2018 &bytes_read, &smb_read_data,
2020 /* BB more RC checks ? */
2021 if (rc == -EAGAIN) {
2022 if (smb_read_data) {
2023 if (buf_type == CIFS_SMALL_BUFFER)
2024 cifs_small_buf_release(smb_read_data);
2025 else if (buf_type == CIFS_LARGE_BUFFER)
2026 cifs_buf_release(smb_read_data);
2027 smb_read_data = NULL;
2031 if ((rc < 0) || (smb_read_data == NULL)) {
2032 cFYI(1, "Read error in readpages: %d", rc);
2034 } else if (bytes_read > 0) {
2035 task_io_account_read(bytes_read);
2036 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2037 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2038 smb_read_data + 4 /* RFC1001 hdr */ +
2039 le16_to_cpu(pSMBr->DataOffset));
2041 i += bytes_read >> PAGE_CACHE_SHIFT;
2042 cifs_stats_bytes_read(pTcon, bytes_read);
2043 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2044 i++; /* account for partial page */
2046 /* server copy of file can have smaller size
2048 /* BB do we need to verify this common case ?
2049 this case is ok - if we are at server EOF
2050 we will hit it on next read */
2055 cFYI(1, "No bytes read (%d) at offset %lld . "
2056 "Cleaning remaining pages from readahead list",
2057 bytes_read, offset);
2058 /* BB turn off caching and do new lookup on
2059 file size at server? */
2062 if (smb_read_data) {
2063 if (buf_type == CIFS_SMALL_BUFFER)
2064 cifs_small_buf_release(smb_read_data);
2065 else if (buf_type == CIFS_LARGE_BUFFER)
2066 cifs_buf_release(smb_read_data);
2067 smb_read_data = NULL;
2072 /* need to free smb_read_data buf before exit */
2073 if (smb_read_data) {
2074 if (buf_type == CIFS_SMALL_BUFFER)
2075 cifs_small_buf_release(smb_read_data);
2076 else if (buf_type == CIFS_LARGE_BUFFER)
2077 cifs_buf_release(smb_read_data);
2078 smb_read_data = NULL;
2085 static int cifs_readpage_worker(struct file *file, struct page *page,
2091 page_cache_get(page);
2092 read_data = kmap(page);
2093 /* for reads over a certain size could initiate async read ahead */
2095 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2100 cFYI(1, "Bytes read %d", rc);
2102 file->f_path.dentry->d_inode->i_atime =
2103 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2105 if (PAGE_CACHE_SIZE > rc)
2106 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2108 flush_dcache_page(page);
2109 SetPageUptodate(page);
2114 page_cache_release(page);
2118 static int cifs_readpage(struct file *file, struct page *page)
2120 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2126 if (file->private_data == NULL) {
2132 cFYI(1, "readpage %p at offset %d 0x%x\n",
2133 page, (int)offset, (int)offset);
2135 rc = cifs_readpage_worker(file, page, &offset);
2143 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2145 struct cifsFileInfo *open_file;
2147 read_lock(&GlobalSMBSeslock);
2148 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2149 if (open_file->closePend)
2151 if (open_file->pfile &&
2152 ((open_file->pfile->f_flags & O_RDWR) ||
2153 (open_file->pfile->f_flags & O_WRONLY))) {
2154 read_unlock(&GlobalSMBSeslock);
2158 read_unlock(&GlobalSMBSeslock);
2162 /* We do not want to update the file size from server for inodes
2163 open for write - to avoid races with writepage extending
2164 the file - in the future we could consider allowing
2165 refreshing the inode only on increases in the file size
2166 but this is tricky to do without racing with writebehind
2167 page caching in the current Linux kernel design */
2168 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2173 if (is_inode_writable(cifsInode)) {
2174 /* This inode is open for write at least once */
2175 struct cifs_sb_info *cifs_sb;
2177 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2178 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2179 /* since no page cache to corrupt on directio
2180 we can change size safely */
2184 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2192 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2193 loff_t pos, unsigned len, unsigned flags,
2194 struct page **pagep, void **fsdata)
2196 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2197 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2198 loff_t page_start = pos & PAGE_MASK;
2203 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2205 page = grab_cache_page_write_begin(mapping, index, flags);
2211 if (PageUptodate(page))
2215 * If we write a full page it will be up to date, no need to read from
2216 * the server. If the write is short, we'll end up doing a sync write
2219 if (len == PAGE_CACHE_SIZE)
2223 * optimize away the read when we have an oplock, and we're not
2224 * expecting to use any of the data we'd be reading in. That
2225 * is, when the page lies beyond the EOF, or straddles the EOF
2226 * and the write will cover all of the existing data.
2228 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2229 i_size = i_size_read(mapping->host);
2230 if (page_start >= i_size ||
2231 (offset == 0 && (pos + len) >= i_size)) {
2232 zero_user_segments(page, 0, offset,
2236 * PageChecked means that the parts of the page
2237 * to which we're not writing are considered up
2238 * to date. Once the data is copied to the
2239 * page, it can be set uptodate.
2241 SetPageChecked(page);
2246 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2248 * might as well read a page, it is fast enough. If we get
2249 * an error, we don't need to return it. cifs_write_end will
2250 * do a sync write instead since PG_uptodate isn't set.
2252 cifs_readpage_worker(file, page, &page_start);
2254 /* we could try using another file handle if there is one -
2255 but how would we lock it to prevent close of that handle
2256 racing with this read? In any case
2257 this will be written out by write_end so is fine */
2265 cifs_oplock_break(struct slow_work *work)
2267 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2269 struct inode *inode = cfile->pInode;
2270 struct cifsInodeInfo *cinode = CIFS_I(inode);
2271 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb);
2274 if (inode && S_ISREG(inode->i_mode)) {
2275 if (cinode->clientCanCacheRead)
2276 break_lease(inode, O_RDONLY);
2278 break_lease(inode, O_WRONLY);
2279 rc = filemap_fdatawrite(inode->i_mapping);
2280 if (cinode->clientCanCacheRead == 0) {
2281 waitrc = filemap_fdatawait(inode->i_mapping);
2282 invalidate_remote_inode(inode);
2287 cinode->write_behind_rc = rc;
2288 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2292 * releasing stale oplock after recent reconnect of smb session using
2293 * a now incorrect file handle is not a data integrity issue but do
2294 * not bother sending an oplock release if session to server still is
2295 * disconnected since oplock already released by the server
2297 if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2298 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2299 LOCKING_ANDX_OPLOCK_RELEASE, false);
2300 cFYI(1, "Oplock release rc = %d", rc);
2305 cifs_oplock_break_get(struct slow_work *work)
2307 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2310 cifsFileInfo_get(cfile);
2315 cifs_oplock_break_put(struct slow_work *work)
2317 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2320 cifsFileInfo_put(cfile);
2323 const struct slow_work_ops cifs_oplock_break_ops = {
2324 .get_ref = cifs_oplock_break_get,
2325 .put_ref = cifs_oplock_break_put,
2326 .execute = cifs_oplock_break,
2329 const struct address_space_operations cifs_addr_ops = {
2330 .readpage = cifs_readpage,
2331 .readpages = cifs_readpages,
2332 .writepage = cifs_writepage,
2333 .writepages = cifs_writepages,
2334 .write_begin = cifs_write_begin,
2335 .write_end = cifs_write_end,
2336 .set_page_dirty = __set_page_dirty_nobuffers,
2337 /* .sync_page = cifs_sync_page, */
2342 * cifs_readpages requires the server to support a buffer large enough to
2343 * contain the header plus one complete page of data. Otherwise, we need
2344 * to leave cifs_readpages out of the address space operations.
2346 const struct address_space_operations cifs_addr_ops_smallbuf = {
2347 .readpage = cifs_readpage,
2348 .writepage = cifs_writepage,
2349 .writepages = cifs_writepages,
2350 .write_begin = cifs_write_begin,
2351 .write_end = cifs_write_end,
2352 .set_page_dirty = __set_page_dirty_nobuffers,
2353 /* .sync_page = cifs_sync_page, */